Adding replay tool to allow replaying matches from saved yaml files (#114)

muralx · web-flow · commit 481bbdb448ac · 2025-03-21T23:33:52.000Z
* Adding replay agent and replay tool to allow replaying matches from saved yaml files

* Adding delay between moves

* Decoupling agent registry from base class, keeping self registration for now
diff --git a/deep_quoridor/src/agents/__init__.py b/deep_quoridor/src/agents/__init__.py
@@ -4,30 +4,45 @@ class Agent:
     Given a game state, the agent should return an action.
     """
 
-    agents = {}
+    def name(self) -> str:
+        raise NotImplementedError("You must implement the name method")
 
-    def __init_subclass__(cls, **kwargs):
-        friendly_name = Agent._friendly_name(cls.__name__)
-        Agent.agents[friendly_name] = cls
+    def get_action(self, game) -> int:
+        raise NotImplementedError("You must implement the get_action method")
 
-    def name(self):
-        return Agent._friendly_name(self.__class__.__name__)
 
-    @staticmethod
-    def _friendly_name(class_name: str):
-        return class_name.replace("Agent", "").lower()
+class AgentRegistry:
+    agents = {}
 
     @staticmethod
-    def create(friendly_name: str) -> "Agent":
-        return Agent.agents[friendly_name]()
+    def create(friendly_name: str) -> Agent:
+        return AgentRegistry.agents[friendly_name]()
 
     @staticmethod
     def names():
-        return list(Agent.agents.keys())
+        return list(AgentRegistry.agents.keys())
 
-    def get_action(self, game):
-        raise NotImplementedError("You must implement the get_action method")
+    @staticmethod
+    def register(name: str, agent_class):
+        AgentRegistry.agents[name] = agent_class
+
+
+class SelfRegisteringAgent(Agent):
+    """
+    Base class for all agents.
+    Given a game state, the agent should return an action.
+    """
+
+    def __init_subclass__(cls, **kwargs):
+        AgentRegistry.register(SelfRegisteringAgent._friendly_name(cls.__name__), cls)
+
+    def name(self):
+        return SelfRegisteringAgent._friendly_name(self.__class__.__name__)
+
+    @staticmethod
+    def _friendly_name(class_name: str):
+        return class_name.replace("Agent", "").lower()
 
 
-from agents.random import RandomAgent
-from agents.simple import SimpleAgent
+from agents.random import RandomAgent  # noqa: E402, F401
+from agents.simple import SimpleAgent  # noqa: E402, F401
diff --git a/deep_quoridor/src/agents/random.py b/deep_quoridor/src/agents/random.py
@@ -1,7 +1,7 @@
-from agents import Agent
+from agents import SelfRegisteringAgent
 
 
-class RandomAgent(Agent):
+class RandomAgent(SelfRegisteringAgent):
     def __init__(self):
         super().__init__()
 
diff --git a/deep_quoridor/src/agents/replay.py b/deep_quoridor/src/agents/replay.py
@@ -0,0 +1,30 @@
+from agents import Agent
+
+
+class ReplayAgent(Agent):
+    """A replay agent that plays predefined actions in sequence.
+
+    This agent is used for replaying a sequence of actions, typically for testing or
+    demonstration purposes. It simply returns actions from a predefined list in order.
+
+    Args:
+        actions (list[int]): A list of predefined actions to be played in sequence.
+
+    Attributes:
+        actions (list[int]): The list of predefined actions.
+        action_index (int): Current index in the actions list.
+    """
+
+    def __init__(self, name: str, predefined_actions: list[int]):
+        super().__init__()
+        self.actions = predefined_actions
+        self.action_index = 0
+        self.original_name = name
+
+    def get_action(self, game):
+        action = self.actions[self.action_index]
+        self.action_index += 1
+        return action
+
+    def name(self):
+        return f"replay-{self.original_name}"
diff --git a/deep_quoridor/src/agents/simple.py b/deep_quoridor/src/agents/simple.py
@@ -1,4 +1,4 @@
-from agents import Agent
+from agents import SelfRegisteringAgent
 
 
 def sample_random_action_sequence(game, max_path_length):
@@ -29,7 +29,7 @@ def sample_random_action_sequence(game, max_path_length):
     return action_sequence, total_reward
 
 
-class SimpleAgent(Agent):
+class SimpleAgent(SelfRegisteringAgent):
     def __init__(self, sequence_length=3, num_sequences=10):
         super().__init__()
         self.sequence_length = sequence_length
@@ -42,7 +42,9 @@ def get_action(self, game):
 
         possible_action_sequences = []
         for _ in range(self.num_sequences):
-            action_sequence, total_reward = sample_random_action_sequence(game.copy(), self.sequence_length)
+            action_sequence, total_reward = sample_random_action_sequence(
+                game.copy(), self.sequence_length
+            )
             possible_action_sequences.append((action_sequence, total_reward))
 
         # Choose the action sequence with the highest reward.
diff --git a/deep_quoridor/src/arena.py b/deep_quoridor/src/arena.py
@@ -1,6 +1,8 @@
 from typing import Optional
 from quoridor_env import env
 from agents import Agent
+from agents import AgentRegistry
+from agents.replay import ReplayAgent
 from dataclasses import dataclass
 import time
 
@@ -73,14 +75,18 @@ def __init__(
         step_rewards: bool = False,
         renderer: Optional[ArenaPlugin] = None,
         saver: Optional[ArenaPlugin] = None,
+        plugins: list[ArenaPlugin] = [],
     ):
         self.board_size = board_size
         self.max_walls = max_walls
         self.step_rewards = step_rewards
-        self.game = env(board_size=board_size, max_walls=max_walls, step_rewards=step_rewards)
+        self.game = env(
+            board_size=board_size, max_walls=max_walls, step_rewards=step_rewards
+        )
 
-        plugins = [p for p in [renderer, saver] if p is not None]
-        self.plugins = CompositeArenaPlugin(plugins)
+        self.plugins = CompositeArenaPlugin(
+            [p for p in plugins + [renderer, saver] if p is not None]
+        )
 
     def _play_game(self, agent1: Agent, agent2: Agent, game_id: str) -> GameResult:
         self.game.reset()
@@ -128,12 +134,40 @@ def play_games(self, players: list[str], times: int):
         for i in range(len(players)):
             for j in range(i + 1, len(players)):
                 for t in range(times):
-                    agent_i = Agent.create(players[i])
-                    agent_j = Agent.create(players[j])
-                    agent_1, agent_2 = (agent_i, agent_j) if t % 2 == 0 else (agent_j, agent_i)
+                    agent_i = AgentRegistry.create(players[i])
+                    agent_j = AgentRegistry.create(players[j])
+                    agent_1, agent_2 = (
+                        (agent_i, agent_j) if t % 2 == 0 else (agent_j, agent_i)
+                    )
 
                     result = self._play_game(agent_1, agent_2, f"game_{match_id:04d}")
                     results.append(result)
                     match_id += 1
 
         self.plugins.end_arena(self.game, results)
+
+    def replay_games(self, arena_data: dict, game_ids_to_replay: list[str]):
+        """Replays a series of games from previously recorded arena data.
+
+        This method simulates games using recorded moves from previous matches, allowing for
+        replay and analysis of historical games.
+        """
+        self.plugins.start_arena(self.game)
+
+        results = []
+
+        if len(game_ids_to_replay) == 0:
+            game_ids_to_replay = arena_data["games"].keys()
+
+        for game_id in game_ids_to_replay:
+            game_data = arena_data["games"][game_id]
+            steps_player1 = game_data["actions"][::2]
+            steps_player2 = game_data["actions"][1::2]
+
+            agent_1 = ReplayAgent(game_data["player1"], steps_player1)
+            agent_2 = ReplayAgent(game_data["player2"], steps_player2)
+
+            result = self._play_game(agent_1, agent_2, game_id)
+            results.append(result)
+
+        self.plugins.end_arena(self.game, results)
diff --git a/deep_quoridor/src/arena_yaml_recorder.py b/deep_quoridor/src/arena_yaml_recorder.py
@@ -1,5 +1,4 @@
 from arena import ArenaPlugin, Agent, GameResult
-from typing import Optional
 import yaml
 
 
@@ -35,3 +34,11 @@ def end_arena(self, game, results: list[GameResult]):
         }
         with open(self.filename, "w") as file:
             file.write(yaml.dump(output, sort_keys=False))
+
+    @staticmethod
+    def load_recorded_arena_data(filename: str) -> dict:
+        with open(filename, "r") as file:
+            return yaml.load(file, Loader=yaml.FullLoader)
+
+
+
diff --git a/deep_quoridor/src/main.py b/deep_quoridor/src/main.py
@@ -2,27 +2,45 @@
 from arena_yaml_recorder import ArenaYAMLRecorder
 from arena import Arena
 from renderers import Renderer
-from agents import Agent
+from agents import AgentRegistry
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="Deep Quoridor")
     parser.add_argument("-N", "--board_size", type=int, default=None, help="Board Size")
-    parser.add_argument("-W", "--max_walls", type=int, default=None, help="Max walls per player")
-    parser.add_argument("-r", "--renderer", choices=Renderer.names(), default="results", help="Render mode")
-    parser.add_argument("--step_rewards", action="store_true", default=False, help="Enable step rewards")
+    parser.add_argument(
+        "-W", "--max_walls", type=int, default=None, help="Max walls per player"
+    )
+    parser.add_argument(
+        "-r",
+        "--renderer",
+        choices=Renderer.names(),
+        default="results",
+        help="Render mode",
+    )
+    parser.add_argument(
+        "--step_rewards", action="store_true", default=False, help="Enable step rewards"
+    )
     parser.add_argument(
         "-p",
         "--players",
         nargs="+",
-        choices=Agent.names(),
+        choices=AgentRegistry.names(),
         default=["random", "simple"],
         help="List of players to compete against each other",
     )
     parser.add_argument(
-        "-A", "--all", action="store_true", default=False, help="Plays a tournament of all agents against each other"
+        "-A",
+        "--all",
+        action="store_true",
+        default=False,
+        help="Plays a tournament of all agents against each other",
     )
     parser.add_argument(
-        "-t", "--times", type=int, default=10, help="Number of times each player will play with each opponent"
+        "-t",
+        "--times",
+        type=int,
+        default=10,
+        help="Number of times each player will play with each opponent",
     )
     parser.add_argument(
         "--games_output_filename",
@@ -39,7 +57,7 @@
     if args.games_output_filename != "None":
         saver = ArenaYAMLRecorder(args.games_output_filename)
 
-    players = Agent.names() if args.all else args.players
+    players = AgentRegistry.names() if args.all else args.players
 
     arena_args = {
         "board_size": args.board_size,
diff --git a/deep_quoridor/src/replay_tool.py b/deep_quoridor/src/replay_tool.py
@@ -0,0 +1,83 @@
+import argparse
+import time
+from arena_yaml_recorder import ArenaYAMLRecorder
+from arena import Arena
+from arena import ArenaPlugin
+from renderers import Renderer
+
+
+"""Deep Quoridor Game Replay Tool
+
+This script allows replaying recorded Quoridor games from YAML files. It provides command-line
+options to customize the replay experience, including renderer selection and specific game filtering.
+
+Command-line Arguments:
+    -r, --renderer: Render mode for game replay visualization (default: "results")
+    -t, --time_delay: Time delay between moves in milliseconds. Only applied if > 0 (default: -1)
+    -g, --game_ids: List of specific game IDs to replay. If not set, replays all games
+    -f, --games_input_filename: Path to YAML file containing recorded games (default: "game_recording.yaml")
+
+Example Usage:
+    python replay_tool.py -r text -g game_0008 game_0009 -f my_games.yaml
+    python replay_tool.py -r text -t 500 -f my_games.yaml  # Replay with 500ms delay between moves
+"""
+
+
+class ActionDelayPlugin(ArenaPlugin):
+    def __init__(self, time_delay: int):
+        self.time_delay = time_delay
+
+    def action(self, game, step, agent, action):
+        time.sleep(self.time_delay / 1000)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Deep Quoridor replay tool")
+    parser.add_argument(
+        "-r",
+        "--renderer",
+        choices=Renderer.names(),
+        default="results",
+        help="Render mode",
+    )
+    parser.add_argument(
+        "-t",
+        "--time_delay",
+        type=int,
+        default=-1,
+        help="Time delay between moves in ms, > 0 or ignored (default: -1)",
+    )
+    parser.add_argument(
+        "-g",
+        "--game_ids",
+        nargs="+",
+        type=str,
+        default=[],
+        help="Game IDs to replay, if not set it will replay all games",
+    )
+    parser.add_argument(
+        "-f",
+        "--games_input_filename",
+        type=str,
+        default="game_recording.yaml",
+        help="Load the played games from the file",
+    )
+
+    args = parser.parse_args()
+
+    renderer = Renderer.create(args.renderer)
+
+    arena_data = ArenaYAMLRecorder.load_recorded_arena_data(args.games_input_filename)
+
+    arena_args = {
+        "board_size": arena_data["config"]["board_size"],
+        "max_walls": arena_data["config"]["max_walls"],
+        "step_rewards": arena_data["config"]["step_rewards"],
+        "renderer": renderer,
+        "plugins": [ActionDelayPlugin(args.time_delay)] if args.time_delay > 0 else [],
+    }
+
+    arena_args = {k: v for k, v in arena_args.items() if v is not None}
+    arena = Arena(**arena_args)
+
+    arena.replay_games(arena_data, args.game_ids)
diff --git a/game_recording.yaml b/game_recording.yaml