fix(pu): fix some data type bug

tAnGjIa520 · tAnGjIa520 · commit a24caceabfe4 · 2025-10-21T06:28:46.000Z
diff --git a/lzero/model/common.py b/lzero/model/common.py
@@ -470,7 +470,6 @@ def decode(self, embeddings: torch.Tensor, max_length: int = 512) -> str:
     def forward(self, x: torch.Tensor, no_grad: bool = True) -> torch.Tensor:
         return self.encode(x, no_grad=no_grad)
 
-
 class HFLanguageRepresentationNetwork(nn.Module):
     def __init__(self,
                 model_path: str = 'google-bert/bert-base-uncased',
@@ -489,32 +488,26 @@ def __init__(self,
         super().__init__()
         from transformers import AutoModel, AutoTokenizer
 
-        # [FIX] Load tokenizer for ALL ranks, not just non-zero ranks
         if tokenizer is not None:
             self.tokenizer = tokenizer
         else:
-            # Load tokenizer with same distributed logic as model
             if get_rank() == 0:
                 self.tokenizer = AutoTokenizer.from_pretrained(model_path)
             if get_world_size() > 1:
                 torch.distributed.barrier()
             if get_rank() != 0:
                 self.tokenizer = AutoTokenizer.from_pretrained(model_path)
 
-        # In distributed settings, ensure only rank 0 downloads the model/tokenizer.
         if get_rank() == 0:
             self.pretrained_model = AutoModel.from_pretrained(model_path)
-
         if get_world_size() > 1:
-            # Wait for rank 0 to finish loading the model.
             torch.distributed.barrier()
         if get_rank() != 0:
             self.pretrained_model = AutoModel.from_pretrained(model_path)
 
         self.embedding_size = embedding_size
         self.embed_proj_head = nn.Linear(self.pretrained_model.config.hidden_size, self.embedding_size)
 
-        # # Select the normalization method based on the final_norm_option_in_encoder parameter.
         if final_norm_option_in_encoder.lower() == "simnorm":
             self.norm = SimNorm(simnorm_dim=group_size)
         elif final_norm_option_in_encoder.lower() == "layernorm":
@@ -533,26 +526,140 @@ def forward(self, x: torch.Tensor, no_grad: bool = True) -> torch.Tensor:
         Returns:
             - (:obj:`torch.Tensor`): The final language embedding of shape (B, embedding_size).
         """
-
+        # Ensure the input tensor is of type long.
+        x = x.long()
+        
         # Construct the attention mask to exclude padding tokens.
-        attention_mask = x != self.tokenizer.pad_token_id
+        attention_mask = (x != self.tokenizer.pad_token_id).long()
+        
+        # ==================== 修复开始 ====================
+        # 1. 显式地创建 token_type_ids
+        # 对于单句输入，token_type_ids 是一个与 input_ids 形状相同的全零张量。
+        token_type_ids = torch.zeros_like(x, device=x.device)
+
+        # 2. 移除危险的内部状态修改
+        # 下面的代码块是导致错误的根源，必须删除。
+        # if hasattr(self.pretrained_model, 'embeddings') and hasattr(self.pretrained_model.embeddings, 'token_type_ids'):
+        #     self.pretrained_model.embeddings.token_type_ids = None
+        # ==================== 修复结束 ====================
 
         if no_grad:
             with torch.no_grad():
-                x = x.long()  # Ensure the input tensor is of type long.
-                outputs = self.pretrained_model(x, attention_mask=attention_mask)
-                # Get the hidden state from the last layer and select the output corresponding to the [CLS] token.
+                # 3. 在模型调用时传入 token_type_ids
+                outputs = self.pretrained_model(x, attention_mask=attention_mask, token_type_ids=token_type_ids)
                 cls_embedding = outputs.last_hidden_state[:, 0, :]
         else:
-            x = x.long()
-            outputs = self.pretrained_model(x, attention_mask=attention_mask)
+            # 3. 在模型调用时传入 token_type_ids
+            outputs = self.pretrained_model(x, attention_mask=attention_mask, token_type_ids=token_type_ids)
             cls_embedding = outputs.last_hidden_state[:, 0, :]
 
         cls_embedding = self.embed_proj_head(cls_embedding)
         cls_embedding = self.norm(cls_embedding)
         
         return cls_embedding
 
+# class HFLanguageRepresentationNetwork(nn.Module):
+#     def __init__(self,
+#                 model_path: str = 'google-bert/bert-base-uncased',
+#                 embedding_size: int = 768,
+#                 group_size: int = 8,
+#                 final_norm_option_in_encoder: str = "layernorm",
+#                 tokenizer=None):
+#         """
+#         Arguments:
+#             - model_path (str): The path to the pretrained Hugging Face model. Default is 'google-bert/bert-base-uncased'.
+#             - embedding_size (int): The dimension of the output embeddings. Default is 768.
+#             - group_size (int): The group size for SimNorm when using normalization.
+#             - final_norm_option_in_encoder (str): The type of normalization to use ("simnorm" or "layernorm"). Default is "layernorm".
+#             - tokenizer (Optional): An instance of a tokenizer. If None, the tokenizer will be loaded from the pretrained model.
+#         """
+#         super().__init__()
+#         from transformers import AutoModel, AutoTokenizer
+
+#         # [FIX] Load tokenizer for ALL ranks, not just non-zero ranks
+#         if tokenizer is not None:
+#             self.tokenizer = tokenizer
+#         else:
+#             # Load tokenizer with same distributed logic as model
+#             if get_rank() == 0:
+#                 self.tokenizer = AutoTokenizer.from_pretrained(model_path)
+#             if get_world_size() > 1:
+#                 torch.distributed.barrier()
+#             if get_rank() != 0:
+#                 self.tokenizer = AutoTokenizer.from_pretrained(model_path)
+
+#         # In distributed settings, ensure only rank 0 downloads the model/tokenizer.
+#         if get_rank() == 0:
+#             self.pretrained_model = AutoModel.from_pretrained(model_path)
+
+#         if get_world_size() > 1:
+#             # Wait for rank 0 to finish loading the model.
+#             torch.distributed.barrier()
+#         if get_rank() != 0:
+#             self.pretrained_model = AutoModel.from_pretrained(model_path)
+
+#         self.embedding_size = embedding_size
+#         self.embed_proj_head = nn.Linear(self.pretrained_model.config.hidden_size, self.embedding_size)
+
+#         # # Select the normalization method based on the final_norm_option_in_encoder parameter.
+#         if final_norm_option_in_encoder.lower() == "simnorm":
+#             self.norm = SimNorm(simnorm_dim=group_size)
+#         elif final_norm_option_in_encoder.lower() == "layernorm":
+#             self.norm = nn.LayerNorm(embedding_size)
+#         else:
+#             raise NotImplementedError(f"Normalization type '{final_norm_option_in_encoder}' is not implemented. "
+#                                       f"Choose 'simnorm' or 'layernorm'.")
+
+#     def forward(self, x: torch.Tensor, no_grad: bool = True) -> torch.Tensor:
+#         """
+#         Overview:
+#             Computes language representation from input token IDs.
+#         Arguments:
+#             - x (:obj:`torch.Tensor`): Input token sequence of shape (B, seq_len).
+#             - no_grad (:obj:`bool`): If True, run the transformer model in `torch.no_grad()` context.
+#         Returns:
+#             - (:obj:`torch.Tensor`): The final language embedding of shape (B, embedding_size).
+#         """
+
+#         # Construct the attention mask to exclude padding tokens.
+#         attention_mask = x != self.tokenizer.pad_token_id
+
+#         # [FIX] Clear buffered token_type_ids to prevent shape mismatch errors
+#         # BERT models cache token_type_ids for efficiency, but this causes issues
+#         # when batch sizes or sequence lengths vary across different forward passes.
+#         # We delete the buffer entirely and let BERT recreate it with the correct shape.
+#         if hasattr(self.pretrained_model, 'embeddings') and hasattr(self.pretrained_model.embeddings, 'token_type_ids'):
+#             # Check if token_type_ids exists and has wrong shape
+#             if self.pretrained_model.embeddings.token_type_ids is not None:
+#                 expected_seq_len = x.shape[1]
+#                 current_seq_len = self.pretrained_model.embeddings.token_type_ids.shape[1]
+#                 # Only delete if the cached buffer has wrong shape
+#                 if current_seq_len != expected_seq_len:
+#                     # Delete the registered buffer and let BERT recreate it
+#                     delattr(self.pretrained_model.embeddings, 'token_type_ids')
+#                     # Re-register with correct shape
+#                     self.pretrained_model.embeddings.register_buffer(
+#                         "token_type_ids",
+#                         torch.zeros((1, expected_seq_len), dtype=torch.long, device=x.device),
+#                         persistent=False
+#                     )
+
+#         if no_grad:
+#             with torch.no_grad():
+#                 x = x.long()  # Ensure the input tensor is of type long.
+#                 outputs = self.pretrained_model(x, attention_mask=attention_mask)
+#                 # Get the hidden state from the last layer and select the output corresponding to the [CLS] token.
+#                 cls_embedding = outputs.last_hidden_state[:, 0, :]
+#         else:
+#             x = x.long()
+#             outputs = self.pretrained_model(x, attention_mask=attention_mask)
+#             cls_embedding = outputs.last_hidden_state[:, 0, :]
+
+#         cls_embedding = self.embed_proj_head(cls_embedding)
+#         cls_embedding = self.norm(cls_embedding)
+        
+#         return cls_embedding
+
 
 class RepresentationNetworkUniZero(nn.Module):
     
diff --git a/lzero/model/unizero_world_models/tokenizer.py b/lzero/model/unizero_world_models/tokenizer.py
@@ -146,6 +146,11 @@ def encode_to_obs_embeddings(self, x: torch.Tensor, task_id: int = 0) -> torch.T
             x = x.contiguous().view(-1, original_shape[-1])  # Shape: (B*T, E)
         # Note: 2D (B, E) and 4D (B, C, H, W) inputs are processed directly without reshaping.
 
+        # [DEBUG] Log shape before encoder
+        import logging
+        logger = logging.getLogger(__name__)
+        logger.info(f"[TOKENIZER_DEBUG] Before encoder: original_shape={original_shape}, x.shape={x.shape}, encoder_type={type(encoder_module).__name__}")
+
         # Step 3: Pass the processed tensor through the encoder.
         obs_embeddings = encoder_module(x)
         if len(obs_embeddings.shape) != 2:
diff --git a/lzero/model/unizero_world_models/world_model.py b/lzero/model/unizero_world_models/world_model.py
@@ -705,14 +705,16 @@ def forward(
                     f"Returning dummy outputs with correct shapes."
                 )
                 # Return outputs with shape [batch, 1, ...] to allow squeeze(1) to work
+                # Important: logits_value and logits_rewards need support_size dimension
                 batch_size = obs_embeddings.shape[0]
+                support_size = self.config.support_size
                 return WorldModelOutput(
                     torch.zeros(batch_size, 1, self.config.embed_dim, device=self.device),
                     torch.zeros(batch_size, 1, self.num_observations_tokens, device=self.device),
-                    torch.zeros(batch_size, 1, device=self.device),
-                    None,
-                    torch.zeros(batch_size, 1, self.config.action_space_size, device=self.device),
-                    torch.zeros(batch_size, 1, device=self.device),
+                    torch.zeros(batch_size, 1, support_size, device=self.device),  # logits_rewards
+                    None,  # logits_ends
+                    torch.zeros(batch_size, 1, self.config.action_space_size, device=self.device),  # logits_policy
+                    torch.zeros(batch_size, 1, support_size, device=self.device),  # logits_value
                 )
 
             if not self.config.rotary_emb:
@@ -1650,11 +1652,19 @@ def compute_loss(self, batch, target_tokenizer: Tokenizer = None, inverse_scalar
         if self.analysis_dormant_ratio_weight_rank:
             # --- Dormant Ratio Calculation ---
             # Calculate the dormant ratio of the encoder to monitor neuron activity.
-            shape = batch['observations'].shape  # Original shape, e.g., (B, T, C, H, W)
+            shape = batch['observations'].shape  # Original shape, e.g., (B, T, C, H, W) for images or (B, T, E) for text
             # Reshape observations to create a single large batch for the encoder.
-            # E.g., (32, 5, 3, 64, 64) -> (160, 3, 64, 64)
-            inputs = batch['observations'].contiguous().view(-1, *shape[-3:])
-            
+
+            # [FIX] Handle both image and text observations
+            if len(shape) == 5:  # Image: (B, T, C, H, W)
+                # E.g., (32, 5, 3, 64, 64) -> (160, 3, 64, 64)
+                inputs = batch['observations'].contiguous().view(-1, *shape[-3:])
+            elif len(shape) == 3:  # Text: (B, T, E)
+                # E.g., (2, 11, 512) -> (22, 512)
+                inputs = batch['observations'].contiguous().view(-1, shape[-1])
+            else:  # Fall back to original behavior for 2D or 4D
+                inputs = batch['observations'].contiguous().view(-1, *shape[-3:])
+
             dormant_ratio_encoder_dict = calculate_dormant_ratio(
                 self.tokenizer.encoder, inputs.detach(), dormant_threshold=self.dormant_threshold
             )
@@ -1732,7 +1742,10 @@ def compute_loss(self, batch, target_tokenizer: Tokenizer = None, inverse_scalar
                     step_counter=global_step
                 )
 
-        if self.config.use_priority:
+        # [FIX] Add default value for use_priority if not present in config
+        use_priority = getattr(self.config, 'use_priority', False)
+
+        if use_priority:
             # ==================== START MODIFICATION 5 ====================
             # Calculate value_priority, similar to MuZero.
             with torch.no_grad():
diff --git a/lzero/policy/scaling_transform.py b/lzero/policy/scaling_transform.py
@@ -80,8 +80,20 @@ def __init__(
 
     def __call__(self, logits: torch.Tensor, epsilon: float = 0.001) -> torch.Tensor:
         if self.categorical_distribution:
+            # [FIX] Handle edge case where logits might be 1D (batch_size=1 and squeezed)
+            # Ensure logits is at least 2D for softmax operation
+            if logits.dim() == 1:
+                logits = logits.unsqueeze(0)  # [support_size] -> [1, support_size]
+                was_1d = True
+            else:
+                was_1d = False
+
             value_probs = torch.softmax(logits, dim=1)
             value = value_probs.mul_(self.value_support).sum(1, keepdim=True)
+
+            # If input was 1D, squeeze back to maintain shape consistency
+            if was_1d:
+                value = value.squeeze(0)  # [1, 1] -> [1]
         else:
             value = logits
         tmp = ((torch.sqrt(1 + 4 * epsilon * (torch.abs(value) + 1 + epsilon)) - 1) / (2 * epsilon))
diff --git a/zoo/jericho/priorzero/priorzero_config.py b/zoo/jericho/priorzero/priorzero_config.py
@@ -210,7 +210,9 @@ def get_priorzero_config(
             # Analysis flags
             analysis_sim_norm=False,
             analysis_dormant_ratio_weight_rank=False,
-
+            # use_priority=False,
+            use_priority=True,
+            
             # Position encoding
             rotary_emb=False,  # Whether to use RoPE
             rope_theta=10000,
@@ -515,7 +517,7 @@ def get_priorzero_config_for_quick_test(env_id: str = 'zork1.z5', seed: int = 0,
 
     main_config.policy.num_simulations = 2
     main_config.policy.batch_size = 2
-    main_config.policy.game_segment_length = 50
+    main_config.policy.game_segment_length = 20
     main_config.policy.num_segments = 2
     main_config.policy.replay_buffer_size = 1000
 
@@ -525,11 +527,13 @@ def get_priorzero_config_for_quick_test(env_id: str = 'zork1.z5', seed: int = 0,
         main_config.env.collector_env_num,
         main_config.env.evaluator_env_num
     )
-    main_config.policy.model.world_model_cfg.num_heads = 4
-    main_config.policy.model.world_model_cfg.context_length = 3
-    main_config.policy.model.world_model_cfg.num_unroll_steps = 5
-    main_config.policy.model.world_model_cfg.max_blocks = 5
-    main_config.policy.model.world_model_cfg.max_blocks = 10
+    main_config.policy.model.world_model_cfg.num_heads = 2
+    # [FIX] Set infer_context_length to match reduced num_unroll_steps
+    main_config.policy.model.world_model_cfg.infer_context_length = 2  # Reduced from 4
+    main_config.policy.model.world_model_cfg.context_length = 4  # 2 * infer_context_length
+    main_config.policy.model.world_model_cfg.num_unroll_steps = 3
+    main_config.policy.model.world_model_cfg.max_blocks = 3
+    main_config.policy.model.world_model_cfg.max_tokens = 6  # 2 * max_blocks
 
     main_config.policy.llm_policy_cfg.prompt_max_len = 1024
     main_config.policy.llm_policy_cfg.generate_max_len = 128
diff --git a/zoo/jericho/priorzero/priorzero_policy.py b/zoo/jericho/priorzero/priorzero_policy.py