polish(pu): polish chess config

puyuan1996 · puyuan1996 · commit 6649e851c9ca · 2025-05-23T19:06:47.000+08:00
diff --git a/zoo/board_games/chess/config/chess_alphazero_bot_mode_config.py b/zoo/board_games/chess/config/chess_alphazero_bot_mode_config.py
@@ -3,24 +3,24 @@
 # ==============================================================
 # begin of the most frequently changed config specified by the user
 # ==============================================================
-# collector_env_num = 8
-# n_episode = 8
-# evaluator_env_num = 5
-# num_simulations = 400
-# update_per_collect = 200
-# batch_size = 512
-# max_env_step = int(1e6)
-# mcts_ctree = False
+collector_env_num = 8
+n_episode = 8
+evaluator_env_num = 5
+num_simulations = 400
+update_per_collect = 200
+batch_size = 512
+max_env_step = int(1e6)
+mcts_ctree = False
 
 # TODO: for debug
-collector_env_num = 2
-n_episode = 2
-evaluator_env_num = 2
-num_simulations = 4
-update_per_collect = 2
-batch_size = 2
-max_env_step = int(1e4)
-mcts_ctree = False
+# collector_env_num = 2
+# n_episode = 2
+# evaluator_env_num = 2
+# num_simulations = 4
+# update_per_collect = 2
+# batch_size = 2
+# max_env_step = int(1e4)
+# mcts_ctree = False
 # ==============================================================
 # end of the most frequently changed config specified by the user
 # ==============================================================
@@ -56,7 +56,7 @@
         model=dict(
             observation_shape=(8, 8, 20),
             action_space_size=int(8 * 8 * 73),
-            # TODO: for debug
+            # TODO: only for for debug
             num_res_blocks=1,
             num_channels=1,
             value_head_hidden_channels=[16],
diff --git a/zoo/board_games/chess/config/chess_alphazero_sp_mode_config.py b/zoo/board_games/chess/config/chess_alphazero_sp_mode_config.py
@@ -10,24 +10,22 @@
 update_per_collect = 200
 batch_size = 512
 max_env_step = int(1e6)
-mcts_ctree = True
-# mcts_ctree = False
-
+mcts_ctree = False
 
 # TODO: for debug
-collector_env_num = 2
-n_episode = 2
-evaluator_env_num = 2
-num_simulations = 4
-update_per_collect = 2
-batch_size = 2
-max_env_step = int(1e4)
+# collector_env_num = 2
+# n_episode = 2
+# evaluator_env_num = 2
+# num_simulations = 2
+# update_per_collect = 1
+# batch_size = 2
+# max_env_step = int(1e4)
 # mcts_ctree = False
 # ==============================================================
 # end of the most frequently changed config specified by the user
 # ==============================================================
 chess_alphazero_config = dict(
-    exp_name='data_az_ctree/chess_sp-mode_alphazero_seed0',
+    exp_name='data_az_ptree/chess_sp-mode_alphazero_seed0',
     env=dict(
         board_size=8,
         battle_mode='self_play_mode',
@@ -58,14 +56,14 @@
             observation_shape=(8, 8, 20),
             action_space_size=int(8 * 8 * 73),
             # TODO: for debug
-            num_res_blocks=1,
-            num_channels=1,
-            value_head_hidden_channels=[16],
-            policy_head_hidden_channels=[16],
-            # num_res_blocks=8,
-            # num_channels=256,
-            # value_head_hidden_channels=[256, 256],
-            # policy_head_hidden_channels=[256, 256],
+            # num_res_blocks=1,
+            # num_channels=1,
+            # value_head_hidden_channels=[16],
+            # policy_head_hidden_channels=[16],
+            num_res_blocks=8,
+            num_channels=256,
+            value_head_hidden_channels=[256, 256],
+            policy_head_hidden_channels=[256, 256],
         ),
         cuda=True,
         board_size=8,
diff --git a/zoo/board_games/chess/envs/chess_lightzero_env.py b/zoo/board_games/chess/envs/chess_lightzero_env.py
@@ -10,9 +10,8 @@
 from ding.envs.env.base_env import BaseEnvTimestep
 from ding.utils.registry_factory import ENV_REGISTRY
 from gymnasium import spaces
-from pettingzoo.classic.chess import chess_utils
-
 from zoo.board_games.chess.envs.chess_env import ChessEnv
+from pettingzoo.classic.chess import chess_utils as pz_cu
 
 
 @ENV_REGISTRY.register('chess_lightzero')
@@ -50,16 +49,15 @@ def __init__(self, cfg=None):
 
     @property
     def legal_actions(self):
-        return chess_utils.legal_moves(self.board)
+        return pz_cu.legal_moves(self.board)
 
     def observe(self, agent_index):
         try:
-            observation = chess_utils.get_observation(self.board, agent_index).astype(float)  # TODO
+            observation = pz_cu.get_observation(self.board, agent_index).astype(float)  # TODO
         except Exception as e:
-            print('debug')
+            print(f'debug: {e}')
             print(f"self.board:{self.board}")
 
-
         # TODO:
         # observation = np.dstack((observation[:, :, :7], self.board_history))
         # We need to swap the white 6 channels with black 6 channels
@@ -75,9 +73,12 @@ def observe(self, agent_index):
         #         observation[..., 13 * i : 13 * i + 6] = tmp
 
         action_mask = np.zeros(4672, dtype=np.int8)
-        action_mask[chess_utils.legal_moves(self.board)] = 1
+        action_mask[pz_cu.legal_moves(self.board)] = 1
         return {'observation': observation, 'action_mask': action_mask}
 
+
+
+
     def current_state(self):
         """
         Overview:
@@ -103,7 +104,7 @@ def get_done_winner(self):
         if result == "*":
             winner = -1
         else:
-            winner = chess_utils.result_to_int(result)
+            winner = pz_cu.result_to_int(result)
 
         if not done:
             winner = -1
@@ -143,7 +144,7 @@ def reset(self, start_player_index=0, init_state=None, katago_policy_init=False,
             self.board = chess.Board()
 
         action_mask = np.zeros(4672, dtype=np.int8)
-        action_mask[chess_utils.legal_moves(self.board)] = 1
+        action_mask[pz_cu.legal_moves(self.board)] = 1
         # self.board_history = np.zeros((8, 8, 104), dtype=bool)
 
         if self.battle_mode == 'play_with_bot_mode' or self.battle_mode == 'eval_mode':
@@ -265,10 +266,10 @@ def _player_step(self, action):
         current_agent = self.current_player_index
 
         # TODO: Update board history
-        # next_board = chess_utils.get_observation(self.board, current_agent)
+        # next_board = pz_cu.get_observation(self.board, current_agent)
         # self.board_history = np.dstack((next_board[:, :, 7:], self.board_history[:, :, :-13]))
 
-        chosen_move = chess_utils.action_to_move(self.board, action, current_agent)
+        chosen_move = pz_cu.action_to_move(self.board, action, current_agent)
         assert chosen_move in self.board.legal_moves
         self.board.push(chosen_move)
 
@@ -277,7 +278,7 @@ def _player_step(self, action):
         if result == "*":
             reward = 0.
         else:
-            reward = chess_utils.result_to_int(result)
+            reward = pz_cu.result_to_int(result)
 
         if self.current_player == 1:
             reward = -reward
@@ -287,7 +288,7 @@ def _player_step(self, action):
             info['eval_episode_return'] = reward
 
         action_mask = np.zeros(4672, dtype=np.int8)
-        action_mask[chess_utils.legal_moves(self.board)] = 1
+        action_mask[pz_cu.legal_moves(self.board)] = 1
 
         obs = {
             'observation': self.observe(self.current_player_index)['observation'],
@@ -318,14 +319,14 @@ def current_player(self, value):
         self._current_player = value
 
     def random_action(self):
-        action_list = chess_utils.legal_moves(self.board)
+        action_list = pz_cu.legal_moves(self.board)
         return np.random.choice(action_list)
 
     def simulate_action(self, action):
-        if action not in chess_utils.legal_moves(self.board):
+        if action not in pz_cu.legal_moves(self.board):
             raise ValueError("action {0} on board {1} is not legal".format(action, self.board.fen()))
         new_board = copy.deepcopy(self.board)
-        new_board.push(chess_utils.action_to_move(self.board, action, self.current_player_index))
+        new_board.push(pz_cu.action_to_move(self.board, action, self.current_player_index))
         if self.start_player_index == 0:
             start_player_index = 1
         else: