Merge pull request #309 from jonbinney/mcts0

alejandromarcu · web-flow · commit 8de773786438 · 2025-10-28T10:19:07.000-07:00
Raw NN play and dumb_score_raw metric
diff --git a/deep_quoridor/src/agents/alphazero/mcts.py b/deep_quoridor/src/agents/alphazero/mcts.py
@@ -155,6 +155,19 @@ def search_batch(self, initial_games: list[Quoridor]):
         max_iterations = max(num_iterations)
 
         roots = [Node(g, ucb_c=self.ucb_c) for g in initial_games]
+
+        # When n is 0, it plays just with the NN and doesn't actually perform MCTS.
+        # For this, we just set the visit counts to a value proportional to the prior
+        if self.n == 0:
+            value_batch, priors_batch = self.evaluator.evaluate_batch([node.game for node in roots])
+            for root, value, priors in zip(roots, value_batch, priors_batch):
+                root.expand(priors)
+                root.backpropagate(-value)
+                for ch in root.children:
+                    ch.visit_count = int(ch.prior * 1000)
+
+            return [root.children for root in roots], [-(root.value_sum / root.visit_count) for root in roots]
+
         for iteration in range(max_iterations):
             need_evaluation = []  # (root, node)
             for game_idx, root in enumerate(roots):
diff --git a/deep_quoridor/src/metrics.py b/deep_quoridor/src/metrics.py
@@ -1,10 +1,12 @@
 from agents import Agent
+from agents.alphazero.alphazero import AlphaZeroAgent
 from agents.core.agent import AgentRegistry
 from arena import Arena, PlayMode
 from arena_utils import GameResult
 from quoridor_env import env
 from renderers.match_results import MatchResultsRenderer
 from utils.misc import compute_elo, get_opponent_player_id
+from utils.subargs import override_subargs
 
 
 class Metrics:
@@ -78,7 +80,7 @@ def _compute_relative_elo(self, elo_table: dict[str, float], agent_name: str) ->
 
     def compute(
         self, agent_encoded_name: str
-    ) -> tuple[int, dict[str, float], int, float, dict[str, float], dict[str, float], int, int]:
+    ) -> tuple[int, dict[str, float], int, float, dict[str, float], dict[str, float], int, int, int]:
         """
         Evaluates the performance of a given agent by running it against a set of predefined opponents and computing its Elo rating and win percentage.
 
@@ -95,6 +97,7 @@ def compute(
                 - p2_win_percentages (dict[str, float]): Win percentage as player two against each oponnent.
                 - absolute_elo (int): ELO rating obtained during the tournament
                 - dumb_score (int): A score between 0 (perfect) and 100 (always wrong) on how the agent performs in certain basic situations
+                - dumb_score_raw (int): for AlphaZeroAgent, same as dumb_score but with a raw network rather than MCTS.  For other agents, returns dumb_score
 
         Notes:
             - The method disables training mode for trainable agents during evaluation and restores it afterward.
@@ -142,6 +145,11 @@ def compute(
 
         dumb_score = self.dumb_score(agent)
 
+        if isinstance(agent, AlphaZeroAgent):
+            raw_play_encoded_name = override_subargs(play_encoded_name, {"mcts_n": 0})
+            agent_raw = AgentRegistry.create_from_encoded_name(raw_play_encoded_name, arena.game)
+            dumb_score_raw = self.dumb_score(agent_raw, verbose=True)
+
         return (
             VERSION,
             elo_table,
@@ -151,6 +159,7 @@ def compute(
             p2_win_percentages,
             int(absolute_elo),
             dumb_score,
+            dumb_score_raw,
         )
 
     def dumb_score(self, agent: Agent, verbose: bool = False):
diff --git a/deep_quoridor/src/play.py b/deep_quoridor/src/play.py
@@ -68,8 +68,8 @@ def player_with_params(arg):
         "-mx",
         "--max_steps",
         type=int,
-        default=10000,
-        help="Maximum number of steps per game. Default is 10000",
+        default=200,
+        help="Maximum number of steps per game. Default is 200",
     )
     parser.add_argument(
         "--profile",
diff --git a/deep_quoridor/src/plugins/wandb_train.py b/deep_quoridor/src/plugins/wandb_train.py
@@ -159,9 +159,17 @@ def compute_tournament_metrics(self, model_filename: str) -> int:
         agent_encoded_name = override_subargs(self.agent_encoded_name, override_args)
 
         Timer.start("benchmark")
-        _, _, relative_elo, win_perc, p1_win_percentages, p2_win_percentages, absolute_elo, dumb_score = (
-            self.metrics.compute(agent_encoded_name)
-        )
+        (
+            _,
+            _,
+            relative_elo,
+            win_perc,
+            p1_win_percentages,
+            p2_win_percentages,
+            absolute_elo,
+            dumb_score,
+            dumb_score_raw,
+        ) = self.metrics.compute(agent_encoded_name)
         Timer.finish("benchmark", self.episode_count)
 
         print(f"Tournament Metrics - Relative elo: {relative_elo}, win percentage: {win_perc}")
@@ -176,6 +184,7 @@ def compute_tournament_metrics(self, model_filename: str) -> int:
             "win_perc": win_perc,
             "absolute_elo": absolute_elo,
             "dumb_score": dumb_score,
+            "dumb_score_raw": dumb_score_raw,
             "Episode": self.episode_count,  # x axis
         }
 
diff --git a/deep_quoridor/src/run_metrics.py b/deep_quoridor/src/run_metrics.py
@@ -49,14 +49,22 @@ def player_with_params(arg):
     args = parser.parse_args()
     m = Metrics(args.board_size, args.max_walls, args.benchmarks, args.benchmarks_t, args.max_steps, args.num_workers)
     table = PrettyTable()
-    table.field_names = ["Agent", "Elo", "Relative Elo", "Win %", "Dumb Score"]
+    table.field_names = ["Agent", "Elo", "Relative Elo", "Win %", "Dumb Score", "Raw Dumb Score"]
 
     for player in args.players:
         player_nick = AgentRegistry.nick_from_encoded_name(player)
         print(f"=== Computing metrics for {player_nick} ===")
-        _, _, relative_elo, win_perc, p1_win_percentages, p2_win_percentages, absolute_elo, dumb_score = m.compute(
-            player
-        )
-        table.add_row([player_nick, absolute_elo, relative_elo, f"{win_perc:.2f}", dumb_score])
+        (
+            _,
+            _,
+            relative_elo,
+            win_perc,
+            p1_win_percentages,
+            p2_win_percentages,
+            absolute_elo,
+            dumb_score,
+            dumb_score_raw,
+        ) = m.compute(player)
+        table.add_row([player_nick, absolute_elo, relative_elo, f"{win_perc:.2f}", dumb_score, dumb_score_raw])
 
     print(table)