v0.2.0

puyuan1996 · puyuan1996 · commit 85618b9ed3f9 · 2025-04-02T16:41:05.000+08:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,41 @@
+2025.04.01 (v0.2.0)
+- env: Add Metadrive environment and configurations (#192)
+- env: Add Sampled MuZero/UniZero and DMC environment with related configurations (#260)
+- env: Polish Chess environment and its render method; add unittests and configurations (#272)
+- env: Add Jericho environment and its configurations (#307)
+- algo: Add Harmony Dream loss balance in MuZero (#242)
+- algo: Adopt AlphaZero for non-zero-sum games (#245)
+- algo: Add AlphaZero CTree unittest (#306)
+- algo: Add recent MCTS-related papers (#324)
+- algo: Introduce rope to use true timestep index as pos_index (#266)
+- algo: Add Jericho DDP configuration (#337)
+- feat: Add LightZero Sphinx documentation (#237)
+- feat: Add Wandb support (#294)
+- feat: Add Atari100k metric utilities (#295)
+- feat: Add eval_benchmark tests (#296)
+- feat: Add save_replay and collect_episode_data options in Jericho (#333)
+- feat: Add an MCTS TicTacToe demo in one single file (#315)
+- fix: Fix DownSample for different observation shapes (#254)
+- fix: Fix wrong chance values in Stochastic MuZero (#275)
+- fix: Use display_frames_as_gif in CartPole (#288)
+- fix: Fix chance encoder in stochastic_muzero_model_mlp.py (#284)
+- fix: Correct typo in model/utils.py (#290)
+- fix: Fix SMZ compile_args and num_simulations bug in world_model (#297)
+- fix: Fix reward type bug in 2048 and OS import issue in CartPole (#304)
+- fix: Switch to macos-13 in action (#319)
+- fix: Fix SMZ & SEZ config for pixel-based DMC (#322)
+- fix: Fix update_per_collect in DDP setting (#321)
+- fix: Fix obs_shape tuple bug in initialize_zeros_batch (#327)
+- fix: Fix prepare_obs_stack_for_unizero (#328)
+- fix: Fix random_policy when len(ready_env_id)<collector_env_num (#335)
+- fix: Fix timestep compatibility (#339)
+- polish: Polish efficiency and performance on Atari and DMC (#292)
+- polish: Update requirements (#298)
+- polish: Optimize reward/value/policy_head_hidden_channels (#314)
+- polish: Update tutorial configuration and log instructions (#330)
+- ci: Add self-hosted Linux (Ubuntu) CI runner (#259)
+- test: Add self-hosted Linux runner for CI tests (#323)
+  
 2024.07.12 (v0.1.0)
 - env: SumToThree env from pooltool(#227)
 - algo: UniZero (#232)
diff --git a/README.md b/README.md
@@ -28,7 +28,7 @@
 [![GitHub license](https://img.shields.io/github/license/opendilab/LightZero)](https://github.com/opendilab/LightZero/blob/master/LICENSE)
 [![discord badge](https://dcbadge.vercel.app/api/server/dkZS2JF56X?style=flat)](https://discord.gg/dkZS2JF56X)
 
-Updated on 2025.02.08 LightZero-v0.1.0
+Updated on 2025.04.01 LightZero-v0.2.0
 
 English | [简体中文(Simplified Chinese)](https://github.com/opendilab/LightZero/blob/main/README.zh.md) | [Documentation](https://opendilab.github.io/LightZero) | [LightZero Paper](https://arxiv.org/abs/2310.08348) | [🔥UniZero Paper](https://arxiv.org/abs/2406.10667) | [🔥ReZero Paper](https://arxiv.org/abs/2404.16364)
 
diff --git a/README.zh.md b/README.zh.md
@@ -27,7 +27,7 @@
 [![Contributors](https://img.shields.io/github/contributors/opendilab/LightZero)](https://github.com/opendilab/LightZero/graphs/contributors)
 [![GitHub license](https://img.shields.io/github/license/opendilab/LightZero)](https://github.com/opendilab/LightZero/blob/master/LICENSE)
 
-最近更新于 2025.02.08 LightZero-v0.1.0
+最近更新于 2025.04.01 LightZero-v0.2.0
 
 [English](https://github.com/opendilab/LightZero/blob/main/README.md) | 简体中文 | [文档](https://opendilab.github.io/LightZero) | [LightZero 论文](https://arxiv.org/abs/2310.08348) | [🔥UniZero 论文](https://arxiv.org/abs/2406.10667) | [🔥ReZero 论文](https://arxiv.org/abs/2404.16364)
 
diff --git a/lzero/mcts/buffer/game_segment.py b/lzero/mcts/buffer/game_segment.py
@@ -135,7 +135,7 @@ def append(
             obs: np.ndarray,
             reward: np.ndarray,
             action_mask: np.ndarray = None,
-            to_play: List = [-1],
+            to_play: Union[int, List] = -1,
             timestep: int = 0,
             chance: int = 0,
     ) -> None:
diff --git a/lzero/mcts/ptree/ptree_ez.py b/lzero/mcts/ptree/ptree_ez.py
@@ -239,7 +239,7 @@ def prepare(
             noises: List[float],
             value_prefixs: List[float],
             policies: List[List[float]],
-            to_play: List = [-1]
+            to_play: Union[int, List] = -1
     ) -> None:
         """
         Overview:
@@ -261,7 +261,7 @@ def prepare(
             self.roots[i].add_exploration_noise(root_noise_weight, noises[i])
             self.roots[i].visit_count += 1
 
-    def prepare_no_noise(self, value_prefixs: List[float], policies: List[List[float]], to_play: List = [-1]) -> None:
+    def prepare_no_noise(self, value_prefixs: List[float], policies: List[List[float]], to_play: Union[int, List] = -1) -> None:
         """
         Overview:
             Expand the roots without noise.
diff --git a/lzero/mcts/ptree/ptree_mz.py b/lzero/mcts/ptree/ptree_mz.py
@@ -220,7 +220,7 @@ def prepare(
             noises: List[float],
             rewards: List[float],
             policies: List[List[float]],
-            to_play: List = [-1]
+            to_play: Union[int, List] = -1
     ) -> None:
         """
         Overview:
@@ -241,7 +241,7 @@ def prepare(
             self.roots[i].add_exploration_noise(root_noise_weight, noises[i])
             self.roots[i].visit_count += 1
 
-    def prepare_no_noise(self, rewards: List[float], policies: List[List[float]], to_play: List = [-1]) -> None:
+    def prepare_no_noise(self, rewards: List[float], policies: List[List[float]], to_play: Union[int, List] = -1) -> None:
         """
         Overview:
             Expand the roots without noise.
diff --git a/lzero/mcts/ptree/ptree_sez.py b/lzero/mcts/ptree/ptree_sez.py
@@ -374,7 +374,7 @@ def prepare(
             noises: List[float],
             value_prefixs: List[float],
             policies: List[List[float]],
-            to_play: List = [-1]
+            to_play: Union[int, List] = -1
     ) -> None:
         """
         Overview:
@@ -396,7 +396,7 @@ def prepare(
 
             self.roots[i].visit_count += 1
 
-    def prepare_no_noise(self, value_prefixs: List[float], policies: List[List[float]], to_play: List = [-1]) -> None:
+    def prepare_no_noise(self, value_prefixs: List[float], policies: List[List[float]], to_play: Union[int, List] = -1) -> None:
         """
         Overview:
             Expand the roots without noise.
diff --git a/lzero/mcts/ptree/ptree_stochastic_mz.py b/lzero/mcts/ptree/ptree_stochastic_mz.py
@@ -246,7 +246,7 @@ def prepare(
             noises: List[float],
             rewards: List[float],
             policies: List[List[float]],
-            to_play: List = [-1]
+            to_play: Union[int, List] = -1
     ) -> None:
         """
         Overview:
@@ -269,7 +269,7 @@ def prepare(
             self.roots[i].add_exploration_noise(root_noise_weight, noises[i])
             self.roots[i].visit_count += 1
 
-    def prepare_no_noise(self, rewards: List[float], policies: List[List[float]], to_play: List = [-1]) -> None:
+    def prepare_no_noise(self, rewards: List[float], policies: List[List[float]], to_play: Union[int, List] = -1) -> None:
         """
         Overview:
             Expand the roots without noise.
diff --git a/lzero/policy/efficientzero.py b/lzero/policy/efficientzero.py
@@ -666,7 +666,7 @@ def _init_eval(self) -> None:
         else:
             self._mcts_eval = MCTSPtree(self._cfg)
 
-    def _forward_eval(self, data: torch.Tensor, action_mask: list, to_play: List = [-1], ready_env_id: np.array = None, **kwargs):
+    def _forward_eval(self, data: torch.Tensor, action_mask: list, to_play: Union[int, List] =  [-1], ready_env_id: np.array = None, **kwargs):
         """
          Overview:
              The forward function for evaluating the current policy in eval mode. Use model to execute MCTS search.
diff --git a/zoo/jericho/envs/jericho_env.py b/zoo/jericho/envs/jericho_env.py
@@ -466,11 +466,12 @@ def collect_episode_data(self):
 if __name__ == '__main__':
     from easydict import EasyDict
 
+    env_type='detective' # zork1, acorncourt, detective, omniquest
     # Configuration dictionary for the environment.
     env_cfg = EasyDict(
         dict(
             max_steps=400,
-            game_path="./zoo/jericho/envs/z-machine-games-master/jericho-game-suite/" + "zork1.z5",
+            game_path="./zoo/jericho/envs/z-machine-games-master/jericho-game-suite/" + f"{env_type}.z5",
             max_action_num=10,
             tokenizer_path="google-bert/bert-base-uncased",
             max_seq_len=512,
@@ -481,7 +482,7 @@ def collect_episode_data(self):
             evaluator_env_num=1,
             save_replay=True,
             save_replay_path=None,
-            env_type='zork1',               # zork1, acorncourt, detective, omniquest
+            env_type=env_type,
             collect_policy_mode='expert'    # random, human, expert
         )
     )