From fed1cf48e6276068bc680317028021a6016d6454 Mon Sep 17 00:00:00 2001 From: antoniogois Date: Tue, 6 Jan 2026 13:34:35 +0000 Subject: [PATCH] enable payoff matrix to process simultaneous actions, adapt gametheoryGM and include example usage --- .../components/game_master/next_acting.py | 75 ++++ .../components/game_master/payoff_matrix.py | 69 +++- .../game_theoretic_and_dramaturgic.py | 42 +- examples/iterated_prisoners_dilemma.ipynb | 385 ++++++++++++++++++ 4 files changed, 550 insertions(+), 21 deletions(-) create mode 100644 examples/iterated_prisoners_dilemma.ipynb diff --git a/concordia/components/game_master/next_acting.py b/concordia/components/game_master/next_acting.py index ebd1eba5..c88b6346 100644 --- a/concordia/components/game_master/next_acting.py +++ b/concordia/components/game_master/next_acting.py @@ -418,6 +418,81 @@ def set_state(self, state: entity_component.ComponentState) -> None: self._counter = state['counter'] +class NextActingAllEntitiesFromSceneSpec( + entity_component.ContextComponent, entity_component.ComponentWithLogging +): + """A component that makes all scene participants act simultaneously. + + Unlike NextActingFromSceneSpec which cycles through participants one at a time, + this component returns all participants to act at once. Designed for use with + simultaneous engines. + """ + + def __init__( + self, + memory_component_key: str = ( + memory_component.DEFAULT_MEMORY_COMPONENT_KEY + ), + scene_tracker_component_key: str = ( + scene_tracker_component.DEFAULT_SCENE_TRACKER_COMPONENT_KEY + ), + pre_act_label: str = DEFAULT_NEXT_ACTING_PRE_ACT_LABEL, + ): + """Initializes the component. + + Args: + scene_tracker_component_key: The name of the scene tracker component. + pre_act_label: Prefix to add to the output of the component when called in + `pre_act`. + """ + super().__init__() + self._memory_component_key = memory_component_key + self._scene_tracker_component_key = scene_tracker_component_key + self._pre_act_label = pre_act_label + + def _get_named_component_pre_act_value(self, component_name: str) -> str: + """Returns the pre-act value of a named component of the parent entity.""" + return ( + self.get_entity().get_component( + component_name, type_=action_spec_ignored.ActionSpecIgnored + ).get_pre_act_value() + ) + + def _get_current_scene_participants(self) -> Sequence[str]: + scene_tracker = self.get_entity().get_component( + self._scene_tracker_component_key, + type_=scene_tracker_component.SceneTracker, + ) + return scene_tracker.get_participants() + + def pre_act( + self, + action_spec: entity_lib.ActionSpec, + ) -> str: + result = '' + if action_spec.output_type == entity_lib.OutputType.NEXT_ACTING: + scene_participants = self._get_current_scene_participants() + result = ','.join(scene_participants) # All participants at once + return result + + def get_currently_active_player(self) -> str | None: + """Not applicable for this component as all players are always active.""" + raise RuntimeError( + 'Error in NextActingAllEntitiesFromSceneSpec: ' + 'get_currently_active_player() is not applicable for this component ' + 'as all players are always active. You might be using a component ' + 'that calls this method in a simultaneous environment.' + ) + + def get_state(self) -> entity_component.ComponentState: + """Returns the state of the component.""" + return {} + + def set_state(self, state: entity_component.ComponentState) -> None: + """Sets the state of the component.""" + pass + + class NextActionSpec( entity_component.ContextComponent, entity_component.ComponentWithLogging ): diff --git a/concordia/components/game_master/payoff_matrix.py b/concordia/components/game_master/payoff_matrix.py index 8266a7f3..69ea74e6 100644 --- a/concordia/components/game_master/payoff_matrix.py +++ b/concordia/components/game_master/payoff_matrix.py @@ -16,6 +16,7 @@ from collections.abc import Callable, Mapping, Sequence import copy +import re from concordia.agents import entity_agent from concordia.components.agent import memory as memory_component @@ -33,6 +34,8 @@ CollectiveActionProductionFunction = Callable[[int], float] PlayersT = Sequence[entity_agent.EntityAgent] +PUTATIVE_EVENT_TAG = event_resolution_component.PUTATIVE_EVENT_TAG + class PayoffMatrix( entity_component.ContextComponent, entity_component.ComponentWithLogging @@ -45,6 +48,7 @@ def __init__( acting_player_names: Sequence[str], action_to_scores: Callable[[Mapping[str, str]], Mapping[str, float]], scores_to_observation: Callable[[Mapping[str, float]], Mapping[str, str]], + acting_order: str = 'sequential', event_resolution_component_key: str = ( switch_act.DEFAULT_RESOLUTION_COMPONENT_KEY ), @@ -70,6 +74,9 @@ def __init__( a dictionary of scores for each player scores_to_observation: function that maps a dictionary of scores for each player to a dictionary of observations for each player. + acting_order: Order in which players act. Options are 'sequential' + (default, actions extracted from EventResolution component) or + 'simultaneous' (actions collected from memory). event_resolution_component_key: The key of the event resolution component. observation_component_key: The key of the observation component to send observations to players. If None, no observations will be sent. @@ -80,8 +87,15 @@ def __init__( pre_act_label: Prefix to add to the output of the component when called in `pre_act`. verbose: whether to print the full update chain of thought or not + + Raises: + ValueError: If acting_order is not 'sequential' or 'simultaneous'. """ + if acting_order not in ('sequential', 'simultaneous'): + raise ValueError(f'Unsupported acting order: {acting_order}') + self._pre_act_label = pre_act_label + self._acting_order = acting_order self._model = model self._observation_component_key = observation_component_key @@ -116,6 +130,39 @@ def _get_current_scene_participants(self) -> Sequence[str]: return scene_tracker_component.get_participants() return self._acting_player_names + def _extract_actions_from_memory(self) -> None: + """Extract the last action for each scene participant from memory. + + Used in simultaneous mode to collect all players' actions at once. + """ + if not self._memory_component_key: + return + + memory = self.get_entity().get_component( + self._memory_component_key, + type_=memory_component.Memory, + ) + + # Scan for the most recent putative event + suggestions = memory.scan(selector_fn=lambda x: PUTATIVE_EVENT_TAG in x) + if not suggestions: + return + + # Extract the action string from the most recent suggestion + putative_action = suggestions[-1][ + suggestions[-1].find(PUTATIVE_EVENT_TAG) + len(PUTATIVE_EVENT_TAG) : + ] + + # Extract all "PlayerName: Action" pairs using regex + # Pattern matches "word: text" up to next word: or end of string + # pattern = r'(\w+):\s+(.+?)(?=\s+\w+:|$)' + all_players = self._get_current_scene_participants() + pattern = rf'({"|".join(map(re.escape, all_players))}):\s*(.*?)\s*(?=(?:{"|".join(map(re.escape, all_players))}):|$)' + found_actions = dict(re.findall(pattern, putative_action)) + + for player_name in found_actions: + self._partial_joint_action[player_name] = found_actions[player_name] + def _joint_action_is_complete(self, joint_action: Mapping[str, str]) -> bool: for acting_player_name in self._get_current_scene_participants(): if joint_action[acting_player_name] is None: @@ -139,15 +186,19 @@ def post_act( is_action_complete = False if self._latest_action_spec_output_type == entity_lib.OutputType.RESOLVE: - event_resolution = self.get_entity().get_component( - self._event_resolution_component_key, - type_=event_resolution_component.EventResolution, - ) - - player_name = event_resolution.get_active_entity_name() - choice = event_resolution.get_putative_action() - if player_name in self._acting_player_names and choice: - self._partial_joint_action[player_name] = choice + if self._acting_order == 'simultaneous': + # Simultaneous mode: extract all players' actions from memory + self._extract_actions_from_memory() + elif self._acting_order == 'sequential': + # Sequential mode: extract single player action from EventResolution + event_resolution = self.get_entity().get_component( + self._event_resolution_component_key, + type_=event_resolution_component.EventResolution, + ) + player_name = event_resolution.get_active_entity_name() + choice = event_resolution.get_putative_action() + if player_name in self._acting_player_names and choice: + self._partial_joint_action[player_name] = choice # Check if all players have acted so far in the current stage game. joint_action = self._partial_joint_action.copy() diff --git a/concordia/prefabs/game_master/game_theoretic_and_dramaturgic.py b/concordia/prefabs/game_master/game_theoretic_and_dramaturgic.py index f7f102fd..44cbdad8 100644 --- a/concordia/prefabs/game_master/game_theoretic_and_dramaturgic.py +++ b/concordia/prefabs/game_master/game_theoretic_and_dramaturgic.py @@ -129,6 +129,7 @@ class GameMaster(prefab_lib.Prefab): 'scenes': (), 'action_to_scores': _default_action_to_scores, 'scores_to_observation': _default_scores_to_observation, + 'acting_order': 'sequential', } ) entities: ( @@ -211,10 +212,19 @@ def build( gm_components.next_game_master.DEFAULT_NEXT_GAME_MASTER_COMPONENT_KEY ) - next_actor = gm_components.next_acting.NextActingFromSceneSpec( - memory_component_key=actor_components.memory.DEFAULT_MEMORY_COMPONENT_KEY, - scene_tracker_component_key=scene_tracker_key, - ) + acting_order = self.params.get('acting_order', 'sequential') + if acting_order == 'simultaneous': + next_actor = gm_components.next_acting.NextActingAllEntitiesFromSceneSpec( + memory_component_key=actor_components.memory.DEFAULT_MEMORY_COMPONENT_KEY, + scene_tracker_component_key=scene_tracker_key, + ) + elif acting_order == 'sequential': + next_actor = gm_components.next_acting.NextActingFromSceneSpec( + memory_component_key=actor_components.memory.DEFAULT_MEMORY_COMPONENT_KEY, + scene_tracker_component_key=scene_tracker_key, + ) + else: + raise ValueError(f'Unsupported acting order: {acting_order}') next_action_spec = gm_components.next_acting.NextActionSpecFromSceneSpec( memory_component_key=actor_components.memory.DEFAULT_MEMORY_COMPONENT_KEY, @@ -227,6 +237,7 @@ def build( acting_player_names=player_names, action_to_scores=action_to_scores, scores_to_observation=scores_to_observation, + acting_order=acting_order, scene_tracker_component_key=scene_tracker_key, verbose=True, ) @@ -242,11 +253,15 @@ def build( actor_components.observation.DEFAULT_OBSERVATION_COMPONENT_KEY, ] - event_resolution = gm_components.event_resolution.EventResolution( - model=model, - event_resolution_steps=event_resolution_steps, - components=event_resolution_components, - ) + # Only create event resolution for sequential acting + # (simultaneous acting has no single active player to attribute events to) + event_resolution = None + if acting_order == 'sequential': + event_resolution = gm_components.event_resolution.EventResolution( + model=model, + event_resolution_steps=event_resolution_steps, + components=event_resolution_components, + ) scene_tracker = gm_components.scene_tracker.SceneTracker( model=model, scenes=scenes, @@ -276,11 +291,14 @@ def build( next_action_spec ), payoff_matrix_key: payoff_matrix, - gm_components.switch_act.DEFAULT_RESOLUTION_COMPONENT_KEY: ( - event_resolution - ), } + # Only add event resolution for sequential acting + if event_resolution is not None: + components_of_game_master[gm_components.switch_act.DEFAULT_RESOLUTION_COMPONENT_KEY] = ( + event_resolution + ) + component_order = list(components_of_game_master.keys()) act_component = gm_components.switch_act.SwitchAct( diff --git a/examples/iterated_prisoners_dilemma.ipynb b/examples/iterated_prisoners_dilemma.ipynb new file mode 100644 index 00000000..9b2b7d52 --- /dev/null +++ b/examples/iterated_prisoners_dilemma.ipynb @@ -0,0 +1,385 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "7b54ded4", + "metadata": {}, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "70f774d9", + "metadata": {}, + "outputs": [], + "source": [ + "# @title Colab-specific setup (use a CodeSpace to avoid the need for this).\n", + "try:\n", + " %env COLAB_RELEASE_TAG\n", + "except:\n", + " pass # Not running in colab.\n", + "else:\n", + " %pip install --ignore-requires-python --requirement 'https://raw.githubusercontent.com/google-deepmind/concordia/main/examples/requirements.in' 'git+https://github.com/google-deepmind/concordia.git#egg=gdm-concordia'\n", + " %pip list" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51dfe77a", + "metadata": {}, + "outputs": [], + "source": [ + "# @title Imports\n", + "\n", + "from concordia.contrib import language_models as language_model_utils\n", + "import concordia.prefabs.entity as entity_prefabs\n", + "import concordia.prefabs.game_master as game_master_prefabs\n", + "from concordia.prefabs.simulation import generic as simulation\n", + "from concordia.typing import prefab as prefab_lib\n", + "from concordia.typing import entity as entity_lib\n", + "from concordia.typing import scene as scene_lib\n", + "from concordia.utils import helper_functions\n", + "from IPython import display\n", + "import numpy as np\n", + "import sentence_transformers\n", + "from concordia.environment.engines import simultaneous" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6739ce18", + "metadata": {}, + "outputs": [], + "source": [ + "# @title Language Model Selection: provide key or select DISABLE_LANGUAGE_MODEL\n", + "\n", + "# By default this colab uses models via an external API so you must provide an\n", + "# API key. TogetherAI offers open weights models from all sources.\n", + "\n", + "API_KEY = '' # @param {type: 'string'}\n", + "# See concordia/language_model/utils.py\n", + "API_TYPE = 'openai' # e.g. 'together_ai' or 'openai'.\n", + "MODEL_NAME = ( # for API_TYPE = 'together_ai', we recommend MODEL_NAME = 'google/gemma-3-27b-it'\n", + " 'gpt-5'\n", + ")\n", + "# To debug without spending money on API calls, set DISABLE_LANGUAGE_MODEL=True\n", + "DISABLE_LANGUAGE_MODEL = False" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1cada3b6", + "metadata": {}, + "outputs": [], + "source": [ + "# @title Use the selected language model\n", + "\n", + "# Note that it is also possible to use local models or other API models,\n", + "# simply replace this cell with the correct initialization for the model\n", + "# you want to use.\n", + "\n", + "if not DISABLE_LANGUAGE_MODEL and not API_KEY:\n", + " raise ValueError('API_KEY is required.')\n", + "\n", + "model = language_model_utils.language_model_setup(\n", + " api_type=API_TYPE,\n", + " model_name=MODEL_NAME,\n", + " api_key=API_KEY,\n", + " disable_language_model=DISABLE_LANGUAGE_MODEL,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d39cf628", + "metadata": {}, + "outputs": [], + "source": [ + "# @title Setup sentence encoder\n", + "\n", + "if DISABLE_LANGUAGE_MODEL:\n", + " embedder = lambda _: np.ones(3)\n", + "else:\n", + " st_model = sentence_transformers.SentenceTransformer(\n", + " 'sentence-transformers/all-mpnet-base-v2')\n", + " embedder = lambda x: st_model.encode(x, show_progress_bar=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "945c9020", + "metadata": {}, + "outputs": [], + "source": [ + "test = model.sample_text(\n", + " 'Is societal and technological progress like getting a clearer picture of '\n", + " 'something true and deep?')\n", + "print(test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f83d55c2", + "metadata": {}, + "outputs": [], + "source": [ + "# @title Load prefabs from packages to make the specific palette to use here.\n", + "\n", + "prefabs = {\n", + " **helper_functions.get_package_classes(entity_prefabs),\n", + " **helper_functions.get_package_classes(game_master_prefabs),\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2bf10894", + "metadata": {}, + "outputs": [], + "source": [ + "#@title Print menu of prefabs\n", + "\n", + "display.display(\n", + " display.Markdown(helper_functions.print_pretty_prefabs(prefabs)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "db8ac873", + "metadata": {}, + "outputs": [], + "source": [ + "# @title Define payoff functions for prisoner's dilemma\n", + "\n", + "def ipd_action_to_scores(joint_action):\n", + " \"\"\"Map joint action to scores using standard PD payoffs: T=5, R=3, P=1, S=0.\"\"\"\n", + " alice_action = joint_action.get('Alice', 'defect')\n", + " bob_action = joint_action.get('Bob', 'defect')\n", + "\n", + " if alice_action == 'cooperate' and bob_action == 'cooperate':\n", + " return {'Alice': 3.0, 'Bob': 3.0} # Reward (R)\n", + " elif alice_action == 'defect' and bob_action == 'defect':\n", + " return {'Alice': 1.0, 'Bob': 1.0} # Punishment (P)\n", + " elif alice_action == 'cooperate' and bob_action == 'defect':\n", + " return {'Alice': 0.0, 'Bob': 5.0} # Sucker (S) / Temptation (T)\n", + " else: # alice defects, bob cooperates\n", + " return {'Alice': 5.0, 'Bob': 0.0} # Temptation (T) / Sucker (S)\n", + "\n", + "def ipd_scores_to_observation(scores):\n", + " \"\"\"Convert cumulative scores to observations for each player.\"\"\"\n", + " return {\n", + " name: f\"{name}'s cumulative score is now {score:.1f} points.\"\n", + " for name, score in scores.items()\n", + " }" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "022d7da2", + "metadata": {}, + "outputs": [], + "source": [ + "# @title Define the scene for prisoner's dilemma decisions\n", + "\n", + "player_names = ['Alice', 'Bob']\n", + "\n", + "# Define the decision scene with binary choice\n", + "decision_scene = scene_lib.SceneTypeSpec(\n", + " name='decision',\n", + " game_master_name='default rules',\n", + " action_spec=entity_lib.choice_action_spec(\n", + " call_to_action='Does {name} cooperate or defect?',\n", + " options=['cooperate', 'defect'],\n", + " ),\n", + ")\n", + "\n", + "# Create the scene specification for 4 rounds of iterated prisoner's dilemma\n", + "scenes = [\n", + " scene_lib.SceneSpec(\n", + " scene_type=decision_scene,\n", + " participants=player_names,\n", + " num_rounds=4,\n", + " premise={\n", + " name: [\n", + " (\n", + " f\"{name} is playing an iterated prisoner's dilemma game. \"\n", + " \"In each round, both players simultaneously choose to either cooperate or defect. \"\n", + " \"The payoffs are: both cooperate = 3 points each, \"\n", + " \"both defect = 1 point each, \"\n", + " \"one cooperates while other defects = 0 points for cooperator, 5 points for defector. \"\n", + " \"The goal is to maximize cumulative points over all rounds.\"\n", + " ),\n", + " ]\n", + " for name in player_names\n", + " },\n", + " ),\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f72d9851", + "metadata": {}, + "outputs": [], + "source": [ + "# @title Configure instances using library components\n", + "\n", + "instances = [\n", + " prefab_lib.InstanceConfig(\n", + " prefab='basic__Entity',\n", + " role=prefab_lib.Role.ENTITY,\n", + " params={\n", + " 'name': 'Alice',\n", + " },\n", + " ),\n", + " prefab_lib.InstanceConfig(\n", + " prefab='basic__Entity',\n", + " role=prefab_lib.Role.ENTITY,\n", + " params={\n", + " 'name': 'Bob',\n", + " },\n", + " ),\n", + " prefab_lib.InstanceConfig(\n", + " prefab='game_theoretic_and_dramaturgic__GameMaster',\n", + " role=prefab_lib.Role.GAME_MASTER,\n", + " params={\n", + " 'name': 'default rules',\n", + " 'scenes': scenes,\n", + " 'action_to_scores': ipd_action_to_scores,\n", + " 'scores_to_observation': ipd_scores_to_observation,\n", + " 'acting_order': 'simultaneous',\n", + " },\n", + " ),\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cd049423", + "metadata": {}, + "outputs": [], + "source": [ + "config = prefab_lib.Config(\n", + " default_premise=(\n", + " \"Two agents, Alice and Bob, are playing an iterated prisoner's dilemma game. \"\n", + " \"In each round, both players simultaneously choose to either cooperate or defect, \"\n", + " \"without knowing what the other player will do. The payoffs are as follows: \"\n", + " \"If both cooperate, each gets 3 points (mutual cooperation reward). \"\n", + " \"If both defect, each gets 1 point (mutual defection punishment). \"\n", + " \"If one cooperates while the other defects, the cooperator gets 0 points (sucker's payoff) \"\n", + " \"and the defector gets 5 points (temptation payoff). \"\n", + " \"The game will be played for a few rounds, and cumulative payoffs will be tracked.\"\n", + " ),\n", + " default_max_steps=4, # Match number of steps needed for scene to complete\n", + " prefabs=prefabs,\n", + " instances=instances,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "08100865", + "metadata": {}, + "source": [ + "# The simulation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6ada90a9", + "metadata": {}, + "outputs": [], + "source": [ + "# @title Initialize the simulation\n", + "runnable_simulation = simulation.Simulation(\n", + " config=config,\n", + " model=model,\n", + " embedder=embedder,\n", + " engine=simultaneous.Simultaneous()\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e8ba6f45", + "metadata": {}, + "outputs": [], + "source": [ + "# @title Run the simulation\n", + "raw_log = []\n", + "results_log = runnable_simulation.play(\n", + " max_steps=4, # Match number of steps needed for scene to complete\n", + " raw_log=raw_log\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3da29aca", + "metadata": {}, + "outputs": [], + "source": [ + "# @title Display the log\n", + "display.HTML(results_log)" + ] + }, + { + "cell_type": "markdown", + "id": "5b6ece59", + "metadata": {}, + "source": [ + "```\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "you may not use this file except in compliance with the License.\n", + "You may obtain a copy of the License at\n", + "\n", + " https://www.apache.org/licenses/LICENSE-2.0\n", + "\n", + "Unless required by applicable law or agreed to in writing, software\n", + "distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "See the License for the specific language governing permissions and\n", + "limitations under the License.\n", + "```" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "debug_sentence_transf", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.14.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}