fix(pu): fix some merge typo

tAnGjIa520 · tAnGjIa520 · commit ab746d15432c · 2025-09-28T23:02:11.000+08:00
diff --git a/lzero/entry/train_muzero_multitask_segment_ddp.py b/lzero/entry/train_muzero_multitask_segment_ddp.py
@@ -8,7 +8,7 @@
 import torch
 import torch.distributed as dist
 from ding.config import compile_config
-from ding.envs import IEnvManager, create_env_manager, get_vec_env_setting
+from ding.envs import create_env_manager, get_vec_env_setting
 from ding.policy import Policy, create_policy
 from ding.rl_utils import get_epsilon_greedy_fn
 from ding.utils import EasyTimer, set_pkg_seed, get_rank, get_world_size
diff --git a/lzero/entry/utils.py b/lzero/entry/utils.py
@@ -121,7 +121,7 @@ def _is_lora_param(name: str) -> bool:
     return bool(_LORA_PAT.search(name))
 
 
-def freeze_non_lora(
+def freeze_non_lora_parameters(
     module: nn.Module,
     freeze: bool = True,
     *,
diff --git a/lzero/model/common.py b/lzero/model/common.py
@@ -623,9 +623,6 @@ def __init__(
         self.norm_before_last_linear = nn.LayerNorm([num_channels, spatial_size, spatial_size], eps=1e-5)
         self.last_linear = nn.Linear(linear_in_dim, embedding_dim, bias=False)
 
-        elif self.observation_shape[1] in [84, 96]:
-            self.last_linear = nn.Linear(64 * 6 * 6, self.embedding_dim, bias=False)
-
         self.final_norm_option_in_encoder = final_norm_option_in_encoder
         if self.final_norm_option_in_encoder == 'LayerNorm':
             self.final_norm = nn.LayerNorm(self.embedding_dim, eps=1e-5)
diff --git a/lzero/model/unizero_model_multitask.py b/lzero/model/unizero_model_multitask.py
@@ -106,7 +106,7 @@ def _init_vector_components(self, world_model_cfg: EasyDict, obs_act_embed_dim:
         self.decoder_network = VectorDecoderForMemoryEnv(embedding_dim=world_model_cfg.embed_dim, output_shape=25)
         self.tokenizer = Tokenizer(
             encoder=self.representation_network,
-            decoder_network=self.decoder_network,
+            decoder=self.decoder_network,
             with_lpips=False,
             obs_type=world_model_cfg.obs_type
         )
@@ -162,7 +162,7 @@ def _init_image_components(self, world_model_cfg: EasyDict, observation_shape: S
         self.decoder_network = None
         self.tokenizer = Tokenizer(
             encoder=self.representation_network,
-            decoder_network=self.decoder_network,
+            decoder=self.decoder_network,
             with_lpips=False,
             obs_type=world_model_cfg.obs_type
         )
@@ -192,7 +192,7 @@ def _init_image_memory_components(self, world_model_cfg: EasyDict) -> None:
         )
         self.tokenizer = Tokenizer(
             encoder=self.representation_network,
-            decoder_network=self.decoder_network,
+            decoder=self.decoder_network,
             with_lpips=True,
             obs_type=world_model_cfg.obs_type
         )
diff --git a/lzero/model/unizero_world_models/tokenizer.py b/lzero/model/unizero_world_models/tokenizer.py
@@ -115,10 +115,12 @@ def encode_to_obs_embeddings(self, x: torch.Tensor, task_id: int = 0) -> torch.T
         # This handles both single-task (a single nn.Module) and multi-task (an nn.ModuleList) scenarios.
         if isinstance(self.encoder, nn.ModuleList):
             if not 0 <= task_id < len(self.encoder):
-                raise ValueError(
-                    f"Provided task_id {task_id} is invalid for the encoder list of size {len(self.encoder)}."
-                )
-            encoder_module = self.encoder[task_id]
+                # raise ValueError(
+                #     f"Provided task_id {task_id} is invalid for the encoder list of size {len(self.encoder)}."
+                # )
+                encoder_module = self.encoder
+            else:
+                encoder_module = self.encoder[task_id]
         else:
             encoder_module = self.encoder
 
diff --git a/lzero/model/unizero_world_models/world_model.py b/lzero/model/unizero_world_models/world_model.py
@@ -9,7 +9,7 @@
 from torch.distributions import Categorical, Independent, Normal, TransformedDistribution, TanhTransform
 
 from lzero.model.common import SimNorm
-from lzero.model.utils import cal_dormant_ratio, compute_average_weight_magnitude, cal_effective_rank
+from lzero.model.utils import calculate_dormant_ratio, compute_average_weight_magnitude, compute_effective_rank
 from .kv_caching import KeysValues
 from .slicer import Head, PolicyHeadCont
 from .tokenizer import Tokenizer
@@ -45,6 +45,7 @@ def __init__(self, config: TransformerConfig, tokenizer) -> None:
 
         self.transformer = Transformer(self.config)
         self.task_num = 1
+        self.env_num = self.config.env_num
         if self.config.device == 'cpu':
             self.device = torch.device('cpu')
         else:
@@ -70,7 +71,10 @@ def __init__(self, config: TransformerConfig, tokenizer) -> None:
             print(f"self.pos_emb.weight.device: {self.pos_emb.weight.device}")
 
         self.register_token_num = config.register_token_num if hasattr(config, "register_token_num") else 4
-
+        if self.task_embed_option == "concat_task_embed":
+            self.obs_per_embdding_dim = self.config.embed_dim - self.task_embed_dim
+        else:
+            self.obs_per_embdding_dim = self.config.embed_dim
         self.continuous_action_space = self.config.continuous_action_space
 
         # Initialize action embedding table
@@ -1352,7 +1356,7 @@ def compute_loss(self, batch, target_tokenizer: Tokenizer = None, inverse_scalar
             # E.g., (32, 5, 3, 64, 64) -> (160, 3, 64, 64)
             inputs = batch['observations'].contiguous().view(-1, *shape[-3:])
             
-            dormant_ratio_encoder_dict = cal_dormant_ratio(
+            dormant_ratio_encoder_dict = calculate_dormant_ratio(
                 self.tokenizer.encoder, inputs.detach(), dormant_threshold=self.dormant_threshold
             )
             dormant_ratio_encoder = dormant_ratio_encoder_dict['global']
@@ -1370,11 +1374,11 @@ def compute_loss(self, batch, target_tokenizer: Tokenizer = None, inverse_scalar
             # The 'representation_layer_name' argument specifies the target layer within the model's named modules.
             
             # Effective rank for the final linear layer of the encoder.
-            e_rank_last_linear = cal_effective_rank(
+            e_rank_last_linear = compute_effective_rank(
                 self.tokenizer.encoder, inputs, representation_layer_name="last_linear"
             )
             # Effective rank for the SimNorm layer of the encoder.
-            e_rank_sim_norm = cal_effective_rank(
+            e_rank_sim_norm = compute_effective_rank(
                 self.tokenizer.encoder, inputs, representation_layer_name="sim_norm"
             )
 
@@ -1485,7 +1489,7 @@ def compute_loss(self, batch, target_tokenizer: Tokenizer = None, inverse_scalar
         # ========= logging for analysis =========
         if self.analysis_dormant_ratio_weight_rank:
             # Calculate dormant ratio of the world model
-            dormant_ratio_world_model = cal_dormant_ratio(self, {
+            dormant_ratio_world_model = calculate_dormant_ratio(self, {
                 'obs_embeddings_and_act_tokens': (obs_embeddings.detach(), act_tokens.detach())},
                                                           dormant_threshold=self.dormant_threshold)
             dormant_ratio_transformer = dormant_ratio_world_model['transformer']
diff --git a/lzero/model/unizero_world_models/world_model_multitask.py b/lzero/model/unizero_world_models/world_model_multitask.py
@@ -19,8 +19,8 @@
 from lzero.model.common import SimNorm
 from lzero.model.unizero_world_models.world_model import WorldModel
 from lzero.model.utils import (
-    cal_dormant_ratio,
-    cal_effective_rank,
+    calculate_dormant_ratio,
+    compute_effective_rank,
     compute_average_weight_magnitude,
 )
 
@@ -224,7 +224,7 @@ def __init__(self, config: TransformerConfig, tokenizer: Tokenizer) -> None:
 
         # Apply weight initialization. The order of initialization is important.
         self.apply(lambda module: init_weights(module, norm_type=self.config.norm_type))
-        self._initialize_last_layer()
+        self._initialize_last_layer_mt()
 
         # --- Cache and State Initialization ---
         self._initialize_cache_structures()
@@ -415,7 +415,7 @@ def create_head_modules_softmoe(self) -> None:
         self.head_policy = self._create_head_softmoe(self.value_policy_tokens_pattern, self.action_space_size, soft_moe=self.get_soft_moe("policy_soft_moe"))
         self.head_value = self._create_head_softmoe(self.value_policy_tokens_pattern, self.support_size, soft_moe=self.get_soft_moe("value_soft_moe"))
 
-    def _initialize_last_layer(self) -> None:
+    def _initialize_last_layer_mt(self) -> None:
         """Initializes the last linear layer of prediction heads to zero for training stability."""
         last_linear_layer_init_zero = True
         print(f'world_model_mt.py:self.task_num:{self.task_num}')
@@ -1555,7 +1555,7 @@ def compute_loss(self, batch, target_tokenizer: Tokenizer = None, inverse_scalar
                 encoder_index = task_id
             else:
                 encoder_index = 0
-            dormant_ratio_encoder_dict = cal_dormant_ratio(self.tokenizer.encoder[encoder_index], inputs.detach(),
+            dormant_ratio_encoder_dict = calculate_dormant_ratio(self.tokenizer.encoder[encoder_index], inputs.detach(),
                                                     dormant_threshold=self.dormant_threshold)
 
             dormant_ratio_encoder = dormant_ratio_encoder_dict['global']
@@ -1564,9 +1564,9 @@ def compute_loss(self, batch, target_tokenizer: Tokenizer = None, inverse_scalar
             avg_weight_mag_transformer = compute_average_weight_magnitude(self.transformer)
             avg_weight_mag_head = compute_average_weight_magnitude(self.head_dict)
 
-            e_rank_last_linear = cal_effective_rank(self.tokenizer.encoder[encoder_index], inputs, representation_layer_name="last_linear")
+            e_rank_last_linear = compute_effective_rank(self.tokenizer.encoder[encoder_index], inputs, representation_layer_name="last_linear")
             try:
-                e_rank_sim_norm = cal_effective_rank(self.tokenizer.encoder[encoder_index], inputs, representation_layer_name="final_norm")
+                e_rank_sim_norm = compute_effective_rank(self.tokenizer.encoder[encoder_index], inputs, representation_layer_name="final_norm")
             except Exception as e:
                 e_rank_sim_norm = torch.tensor(0.)
                 
@@ -1658,7 +1658,7 @@ def compute_loss(self, batch, target_tokenizer: Tokenizer = None, inverse_scalar
         # if self.analysis_dormant_ratio_weight_rank:
         if self.do_analysis:
             # Calculate dormant ratio of the world model
-            dormant_ratio_world_model = cal_dormant_ratio(self, {
+            dormant_ratio_world_model = calculate_dormant_ratio(self, {
                 'obs_embeddings_and_act_tokens': (obs_embeddings.detach(), act_tokens.detach())},
                                                           dormant_threshold=self.dormant_threshold)
             dormant_ratio_transformer = dormant_ratio_world_model['transformer']
diff --git a/lzero/policy/muzero.py b/lzero/policy/muzero.py
@@ -15,7 +15,7 @@
 from lzero.mcts import MuZeroMCTSCtree as MCTSCtree
 from lzero.mcts import MuZeroMCTSPtree as MCTSPtree
 from lzero.model import ImageTransforms
-from lzero.model.utils import cal_dormant_ratio
+from lzero.model.utils import calculate_dormant_ratio
 from lzero.policy import scalar_transform, InverseScalarTransform, cross_entropy_loss, phi_transform, \
     DiscreteSupport, to_torch_float_tensor, mz_network_output_unpack, select_action, negative_cosine_similarity, \
     prepare_obs, configure_optimizers
@@ -113,7 +113,7 @@ class MuZeroPolicy(Policy):
         # This is done by setting the parameter learn.learner.hook.save_ckpt_after_iter to the same value as eval_freq in the train_muzero.py automatically.
         eval_offline=False,
         # (bool) Whether to calculate the dormant ratio.
-        cal_dormant_ratio=False,
+        calculate_dormant_ratio=False,
         # (bool) Whether to analyze simulation normalization.
         analysis_sim_norm=False,
         # (bool) Whether to analyze dormant ratio.
@@ -423,8 +423,8 @@ def _forward_learn(self, data: Tuple[torch.Tensor]) -> Dict[str, Union[float, in
 
         # ========= logging for analysis =========
         # calculate dormant ratio of encoder
-        if self._cfg.cal_dormant_ratio:
-            self.dormant_ratio_encoder = cal_dormant_ratio(self._learn_model.representation_network, obs_batch.detach(),
+        if self._cfg.calculate_dormant_ratio:
+            self.dormant_ratio_encoder = calculate_dormant_ratio(self._learn_model.representation_network, obs_batch.detach(),
                                                            percentage=self._cfg.dormant_threshold)
         # calculate L2 norm of latent state
         latent_state_l2_norms = torch.norm(latent_state.view(latent_state.shape[0], -1), p=2, dim=1).mean()
@@ -470,7 +470,7 @@ def _forward_learn(self, data: Tuple[torch.Tensor]) -> Dict[str, Union[float, in
             latent_state, reward, value, policy_logits = mz_network_output_unpack(network_output)
 
             # ========= logging for analysis ===============
-            if step_k == self._cfg.num_unroll_steps - 1 and self._cfg.cal_dormant_ratio:
+            if step_k == self._cfg.num_unroll_steps - 1 and self._cfg.calculate_dormant_ratio:
                 # calculate dormant ratio of encoder
                 action_tmp = action_batch[:, step_k]
                 if len(action_tmp.shape) == 1:
@@ -486,7 +486,7 @@ def _forward_learn(self, data: Tuple[torch.Tensor]) -> Dict[str, Union[float, in
                     latent_state.shape[0], policy_logits.shape[-1], latent_state.shape[2], latent_state.shape[3]
                 )
                 state_action_encoding = torch.cat((latent_state, action_encoding), dim=1)
-                self.dormant_ratio_dynamics = cal_dormant_ratio(self._learn_model.dynamics_network,
+                self.dormant_ratio_dynamics = calculate_dormant_ratio(self._learn_model.dynamics_network,
                                                                 state_action_encoding.detach(),
                                                                 percentage=self._cfg.dormant_threshold)
             # ========= logging for analysis ===============
diff --git a/lzero/policy/scaling_transform.py b/lzero/policy/scaling_transform.py
@@ -1,6 +1,6 @@
 from typing import Union
 import torch
-
+import numpy as np
 
 class DiscreteSupport(object):
 
@@ -11,7 +11,6 @@ def __init__(self, start: float, stop: float, step: float = 1., device: Union[st
         assert self.size > 0, "DiscreteSupport size must be greater than 0"
         self.step = step
 
-
 def scalar_transform(x: torch.Tensor, epsilon: float = 0.001, delta: float = 1.) -> torch.Tensor:
     """
     Overview:
diff --git a/lzero/policy/unizero_multitask.py b/lzero/policy/unizero_multitask.py
@@ -522,11 +522,12 @@ def _init_learn(self) -> None:
                 self._cfg.augmentation,
                 image_shape=(self._cfg.model.observation_shape[1], self._cfg.model.observation_shape[2])
             )
-        self.value_support = DiscreteSupport(-self._cfg.model.support_scale, self._cfg.model.support_scale, delta=1)
-        self.reward_support = DiscreteSupport(-self._cfg.model.support_scale, self._cfg.model.support_scale, delta=1)
-        self.inverse_scalar_transform_handle = InverseScalarTransform(
-            self._cfg.model.support_scale, self._cfg.device, self._cfg.model.categorical_distribution
-        )
+
+        self.value_support = DiscreteSupport(*self._cfg.model.value_support_range, self._cfg.device)
+        self.reward_support = DiscreteSupport(*self._cfg.model.reward_support_range, self._cfg.device)
+        self.value_inverse_scalar_transform_handle = InverseScalarTransform(self.value_support, self._cfg.model.categorical_distribution)
+        self.reward_inverse_scalar_transform_handle = InverseScalarTransform(self.reward_support, self._cfg.model.categorical_distribution)
+
         self.intermediate_losses = defaultdict(float)
         self.l2_norm_before = 0.
         self.l2_norm_after = 0.
diff --git a/lzero/worker/muzero_segment_collector.py b/lzero/worker/muzero_segment_collector.py
@@ -133,7 +133,7 @@ def reset(self, _policy: Optional[namedtuple] = None, _env: Optional[BaseEnvMana
             - _policy (:obj:`Optional[namedtuple]`): The new policy to be used.
             - _env (:obj:`Optional[BaseEnvManager]`): The new environment to be used.
         """
-        if _env is not not None:
+        if _env is not None:
             self.reset_env(_env)
         if _policy is not None:
             self.reset_policy(_policy)
diff --git a/zoo/atari/config/atari_unizero_multitask_segment_ddp_config.py b/zoo/atari/config/atari_unizero_multitask_segment_ddp_config.py
@@ -260,8 +260,9 @@ def create_env_manager() -> EasyDict:
         Run the following command to launch the script:
 
         Example launch command:
+        export CUDA_VISIBLE_DEVICES=4,5,6,7
         cd /path/to/your/project/
-        python -m torch.distributed.launch --nproc_per_node=8 --master_port=29502 \\
+        python -m torch.distributed.launch --nproc_per_node=4 --master_port=29502 \\
             /path/to/this/script.py 2>&1 | tee /path/to/your/log/file.log
     """
     from lzero.entry import train_unizero_multitask_segment_ddp
@@ -271,7 +272,8 @@ def create_env_manager() -> EasyDict:
 
     # --- Main Experiment Settings ---
     num_games = 8  # Options: 3, 8, 26
-    num_layers = 4
+    # num_layers = 4
+    num_layers = 2 # debug
     action_space_size = 18
     collector_env_num = 8
     num_segments = 8
@@ -305,7 +307,7 @@ def create_env_manager() -> EasyDict:
     # The effective batch size is adjusted based on the number of games and model size (layers)
     # to fit within GPU memory constraints.
     if len(env_id_list) == 8:
-        if num_layers == 4:
+        if num_layers in [2, 4]:
             effective_batch_size = 512
         elif num_layers == 8:
             effective_batch_size = 512