minor

dolaameng · dolaameng · commit 2e849317d463 · 2026-05-28T22:04:27.000Z
diff --git a/documentation/examples/chatroom_tic_tac_toe.py b/documentation/examples/chatroom_tic_tac_toe.py
@@ -115,6 +115,7 @@ def _check_game_state(self):
 )
 def run_tic_tac_toe(
     llm: kbench.LLMChat,
+    judge_llm: kbench.LLMChat,
 ) -> dict:
     """Runs Tic-Tac-Toe using ChatRoom.
 
@@ -126,7 +127,7 @@ def run_tic_tac_toe(
     After ChatRoom:
         - Game engine is an Actor that posts board state
         - Players see the full history (own moves as "assistant", peer as "user")
-        - talk(schema=TicTacToeMove) returns structured output directly
+        - reply(schema=TicTacToeMove) returns structured output directly
     """
     game = TicTacToe()
     game_engine = actors.Actor(name="Game", role="user", avatar="🎮")
@@ -148,7 +149,7 @@ def run_tic_tac_toe(
         system_prompt="You are player X in Tic-Tac-Toe. When it's your turn, respond with your move (row and col, 0-indexed).",
     )
     player_o = room.add_participant(
-        llm,
+        judge_llm,
         name="PlayerO",
         avatar="⭕",
         system_prompt="You are player O in Tic-Tac-Toe. When it's your turn, respond with your move (row and col, 0-indexed).",
@@ -175,8 +176,6 @@ def run_tic_tac_toe(
 
 # %%
 
-model = kbench.llm
-run_tic_tac_toe.run(llm=model)
-
+run_tic_tac_toe.run(llm=kbench.llm, judge_llm=kbench.judge_llm)
 
 # %%
diff --git a/documentation/examples/chatroom_werewolf.py b/documentation/examples/chatroom_werewolf.py
@@ -14,15 +14,15 @@
 
 """Werewolf: A Benchmark for Social Deduction, Private Channels, and Deception
 
-This example showcases a complete, 4-player game of Werewolf (also known as Mafia) using ChatRoom.
+This example showcases a complete, 7-player game of Werewolf (also known as Mafia) using ChatRoom.
 Werewolf is the gold standard of multi-agent social evaluation because it requires:
 1. Information Asymmetry: Roles are secret (Werewolves know their team; Villagers do not).
 2. Private Channels: Werewolves converse secretly at night to target a victim.
 3. Deception vs. Deduction: Werewolves must lie and blend in; Villagers must audit arguments.
 
 We represent:
 - Alice and Bob as Werewolves (Secret Werewolf Team)
-- Charlie and David as Villagers (Secret Villager Team)
+- Charlie, David, Eve, Frank, and Grace as Villagers (Secret Villager Team)
 """
 
 # %%
@@ -144,7 +144,7 @@ def count_votes(vote_dict: dict) -> str | None:
             wolf_chat = room.private_channel(active_wolves, name="Werewolf Night Chat")
             victim = None
 
-            # Enter the private werewolf channel. Charlie and David are blind to this context.
+            # Enter the private werewolf channel. The villagers are blind to this context.
             with wolf_chat:
                 wolf_chat.post(
                     "Werewolves, discuss and pick a Villager to eliminate tonight."
diff --git a/golden_tests/test_cookbook_examples.py b/golden_tests/test_cookbook_examples.py
@@ -34,7 +34,7 @@
 
 # %%
 import base64
-import dataclasses as _dc
+import dataclasses
 import os
 import tempfile
 from contextlib import contextmanager
@@ -47,9 +47,13 @@
 from pydantic import BaseModel, Field
 
 import kaggle_benchmarks as kbench
-from kaggle_benchmarks import actors as _actors
-from kaggle_benchmarks.chats import ChatRoom as _ChatRoom
-from kaggle_benchmarks.content_types import audios, images, videos
+from kaggle_benchmarks.actors import Actor
+from kaggle_benchmarks.chats import ChatRoom
+from kaggle_benchmarks.content_types import (
+    audios,
+    images,
+    videos,
+)
 
 # Models to be tested as the primary subject.
 TEST_LLM_NAMES = {
@@ -308,13 +312,13 @@ def test_extract_pydantic(llm):
 # --- Test Case: Structured Output (composite pydantic Extraction) ---
 
 
-class Actor(BaseModel):
+class FriendsActor(BaseModel):
     actor_name: str
     role_name: str
 
 
 class Casting(BaseModel):
-    actors: list[Actor]
+    actors: list[FriendsActor]
 
 
 # Known failures (genai): gpt-5.5 — MP sends empty json_schema.name.
@@ -1070,7 +1074,7 @@ def test_tool_with_schema_output(llm):
 @kbench.task()
 def test_chatroom_add_participant(llm):
     """Tests that the same LLM added twice yields independent participants."""
-    room = _ChatRoom(
+    room = ChatRoom(
         system_prompt="A quick Q&A between two experts.",
         name="Host",
     )
@@ -1132,7 +1136,7 @@ def test_chatroom_add_participant(llm):
 # within a ChatRoom context, combining multi-participant rooms with schema.
 
 
-@_dc.dataclass(frozen=True)
+@dataclasses.dataclass(frozen=True)
 class _CityFact:
     """A structured fact about a city."""
 
@@ -1145,12 +1149,12 @@ class _CityFact:
 @kbench.task()
 def test_chatroom_talk_structured_output(llm):
     """Tests that reply(schema=) works inside a ChatRoom."""
-    room = _ChatRoom(
+    room = ChatRoom(
         system_prompt="A geography quiz game.",
         name="QuizMaster",
     )
 
-    host = _actors.Actor(name="QuizMaster", role="user", avatar="🎯")
+    host = Actor(name="QuizMaster", role="user", avatar="🎯")
     room.add_participant(host)
     player = room.add_participant(
         llm,
@@ -1190,7 +1194,7 @@ def test_chatroom_talk_structured_output(llm):
 @kbench.task()
 def test_chatroom_multi_turn(llm):
     """Tests multi-turn conversation: 2 rounds of moderator prompt → LLM reply."""
-    room = _ChatRoom(
+    room = ChatRoom(
         system_prompt="A two-round trivia game.",
         name="Trivia",
     )
@@ -1241,7 +1245,7 @@ def test_chatroom_multi_turn(llm):
 @kbench.task()
 def test_chatroom_private_channel(llm):
     """Tests that private_channel messages are invisible to non-members."""
-    room = _ChatRoom(
+    room = ChatRoom(
         system_prompt="A team coordination exercise with a secret planning phase.",
         name="Coordinator",
     )
@@ -1303,9 +1307,9 @@ def test_chatroom_private_channel(llm):
 @kbench.task()
 def test_chatroom_actor_talk(llm):
     """Tests that a non-LLM Actor can post messages that LLMs respond to."""
-    game = _actors.Actor(name="GameEngine", role="user", avatar="🎮")
+    game = Actor(name="GameEngine", role="user", avatar="🎮")
 
-    room = _ChatRoom(
+    room = ChatRoom(
         system_prompt="A simple number guessing game. The GameEngine posts a number, the Player guesses.",
         name="GameEngine",
     )
diff --git a/src/kaggle_benchmarks/chats.py b/src/kaggle_benchmarks/chats.py
@@ -363,15 +363,30 @@ def __exit__(self, *exc):
     def post(
         self, message: str, visible_to: list["actors.Actor"] | None = None
     ) -> Message:
-        """Post a narrator message to the room.
+        """Post a system/narrator directive to the room.
 
-        Uses the room's ``_narrator`` actor (whose name matches the room name)
-        so the LLM sees consistent sender identity.
+        Messages posted via ``post()`` come from the room's internal narrator
+        — an actor that is **not** a registered participant. The roster
+        explicitly tells LLMs to treat these messages as system instructions
+        rather than peer speech, reducing the chance of LLMs "arguing back"
+        at directives.
+
+        **When to use ``post()`` vs ``actor.say()``:**
+
+        - Use ``post()`` for structural directives that no specific character
+          owns: phase transitions, rules, topic prompts.
+          Example: ``room.post("--- Phase 2: Rebuttals ---")``
+
+        - Use ``actor.say()`` when a **named participant** is speaking and
+          you want the message attributed to that identity in the transcript.
+          Example: ``game_engine.say(f"Board:\\n{board}")``
 
         Args:
             message: The message text.
-            visible_to: Optional list of Actors who can see this message.
-                If None, all participants can see it.
+            visible_to: Optional list of participants who can see this message.
+                If None (default), all participants can see it. Useful for
+                sending private instructions to specific participants without
+                creating a full ``private_channel()``.
         """
         msg = Message(sender=self._narrator, content=message)
         if visible_to is not None: