tAnGjIa520
diff --git a/‎lzero/Tool.py‎
Lines changed: 91 additions & 0 deletions b/‎lzero/Tool.py‎
Lines changed: 91 additions & 0 deletions
diff --git a/‎lzero/entry/train_unizero_multitask_segment_ddp.py‎
Lines changed: 1 addition & 1 deletion b/‎lzero/entry/train_unizero_multitask_segment_ddp.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎lzero/model/common.py‎
Lines changed: 62 additions & 0 deletions b/‎lzero/model/common.py‎
Lines changed: 62 additions & 0 deletions
diff --git a/‎lzero/model/unizero_model_multitask.py‎
Lines changed: 6 additions & 1 deletion b/‎lzero/model/unizero_model_multitask.py‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎lzero/model/unizero_world_models/__init__.py‎
Lines changed: 0 additions & 1 deletion b/‎lzero/model/unizero_world_models/__init__.py‎
Lines changed: 0 additions & 1 deletion
@@ -0,0 +1,91 @@
+import torch
+import numpy as np
+from typing import List, Tuple
+
+def compute_gradient_conflicts(gradients: List[torch.Tensor]) -> dict:
+    """
+    计算多个梯度之间的冲突
+    
+    Args:
+        gradients: 梯度列表，每个元素是一个梯度张量
+    
+    Returns:
+        包含各种冲突指标的字典
+    """
+    results = {}
+    n_gradients = len(gradients)
+    
+    # 确保所有梯度形状相同
+    assert all(g.shape == gradients[0].shape for g in gradients), "梯度形状必须相同"
+    
+    # 1. 余弦相似度矩阵
+    cosine_sim_matrix = torch.zeros(n_gradients, n_gradients)
+    for i in range(n_gradients):
+        for j in range(n_gradients):
+            cos_sim = torch.cosine_similarity(
+                gradients[i].flatten(), 
+                gradients[j].flatten(), 
+                dim=0
+            )
+            cosine_sim_matrix[i, j] = cos_sim
+    
+    results['cosine_similarity_matrix'] = cosine_sim_matrix
+    
+    # 2. 梯度冲突得分 (负余弦相似度的平均)
+    # 排除对角线元素
+    mask = ~torch.eye(n_gradients, dtype=bool)
+    conflict_scores = -cosine_sim_matrix[mask]
+    results['avg_conflict_score'] = conflict_scores.mean().item()
+    results['max_conflict_score'] = conflict_scores.max().item()
+    
+    # 3. 点积矩阵
+    dot_product_matrix = torch.zeros(n_gradients, n_gradients)
+    for i in range(n_gradients):
+        for j in range(n_gradients):
+            dot_prod = torch.dot(gradients[i].flatten(), gradients[j].flatten())
+            dot_product_matrix[i, j] = dot_prod
+    
+    results['dot_product_matrix'] = dot_product_matrix
+    
+    # 4. 梯度范数
+    gradient_norms = [torch.norm(g).item() for g in gradients]
+    results['gradient_norms'] = gradient_norms
+    
+    # 5. 冲突强度 (基于负点积)
+    negative_dot_products = []
+    for i in range(n_gradients):
+        for j in range(i+1, n_gradients):
+            dot_prod = torch.dot(gradients[i].flatten(), gradients[j].flatten())
+            if dot_prod < 0:  # 负点积表示冲突
+                negative_dot_products.append(-dot_prod.item())
+    
+    results['num_conflicting_pairs'] = len(negative_dot_products)
+    results['avg_conflict_intensity'] = np.mean(negative_dot_products) if negative_dot_products else 0
+    
+    return results
+
+# 使用示例
+def example_usage():
+    # 生成示例梯度
+    torch.manual_seed(42)
+    gradients = [
+        torch.randn(100),  # 梯度1
+        torch.randn(100),  # 梯度2  
+        torch.randn(100),  # 梯度3
+    ]
+    
+    # 计算冲突
+    conflicts = compute_gradient_conflicts(gradients)
+    
+    print("梯度冲突分析结果:")
+    print(f"平均冲突得分: {conflicts['avg_conflict_score']:.4f}")
+    print(f"最大冲突得分: {conflicts['max_conflict_score']:.4f}")
+    print(f"冲突梯度对数量: {conflicts['num_conflicting_pairs']}")
+    print(f"平均冲突强度: {conflicts['avg_conflict_intensity']:.4f}")
+    print(f"梯度范数: {conflicts['gradient_norms']}")
+    print("\n余弦相似度矩阵:")
+    print(conflicts['cosine_similarity_matrix'])
+
+
+if __name__ == "__main__":
+    example_usage()
@@ -521,7 +521,7 @@ def train_unizero_multitask_segment_ddp(
         # 编译配置
         cfg = compile_config(cfg, seed=seed, env=None, auto=True, create_cfg=create_cfg, save_cfg=True)
         # 创建共享的policy
-        policy = create_policy(cfg.policy, model=model, enable_field=['learn', 'collect', 'eval'])
+        policy = create_policy(cfg.policy, model=model, enable_field=['learn', 'collect', 'eval']) # MOE
 
         # 加载预训练模型（如果提供）
         if model_path is not None:
 
@@ -248,6 +248,68 @@ def remove_hooks(self):
         self.forward_handler.remove()
         self.backward_handler.remove()
 
+# # modified by tangjia
+# class ModelGradientHook:
+ 
+
+#     def __init__(self):
+#         """
+#         Overview:
+#             Class to capture gradients at model output.
+#         """
+#         self.output_grads = []
+ 
+#     def setup_hook(self, model):
+#         # Hook to capture gradients at model output
+#         self.backward_handler = model.register_full_backward_hook(self.backward_hook)
+ 
+#     def backward_hook(self, module, grad_input, grad_output):
+#         with torch.no_grad():
+#             # 保存输出梯度
+#             if grad_output[0] is not None:
+#                 self.output_grads.append(grad_output[0].clone())
+ 
+#     def analyze(self):
+#         if not self.output_grads:
+#             return None
+        
+#         # Calculate norms of output gradients
+#         grad_norms = [torch.norm(g, p=2, dim=1).mean() for g in self.output_grads]
+#         avg_grad_norm = torch.mean(torch.stack(grad_norms))
+#         max_grad_norm = torch.max(torch.stack(grad_norms))
+#         min_grad_norm = torch.min(torch.stack(grad_norms))
+ 
+#         # Clear stored data and delete tensors to free memory
+#         self.clear_data()
+ 
+#         # Optionally clear CUDA cache
+#         if torch.cuda.is_available():
+#             torch.cuda.empty_cache()
+ 
+#         return avg_grad_norm, max_grad_norm, min_grad_norm
+ 
+#     def clear_data(self):
+#         del self.output_grads[:]
+ 
+#     def remove_hooks(self):
+#         self.backward_handler.remove()
+ 
+# 使用示例
+# monitor = ModelGradientMonitor()
+# monitor.setup_hook(model)
+# 
+# # 训练过程中...
+# loss.backward()
+# 
+# # 获取梯度信息
+# grad_norm = monitor.get_gradient_norm()
+# grad_stats = monitor.get_gradient_stats()
+# 
+# # 清理数据
+# monitor.clear_data()
+# 
+# # 训练结束后移除hook
+# monitor.remove_hook()
 
 class DownSample(nn.Module):
 
 
@@ -6,7 +6,7 @@
 from easydict import EasyDict
 
 from .common import MZNetworkOutput, RepresentationNetworkUniZero, RepresentationNetworkMLP, LatentDecoder, \
-    VectorDecoderForMemoryEnv, LatentEncoderForMemoryEnv, LatentDecoderForMemoryEnv, FeatureAndGradientHook
+    VectorDecoderForMemoryEnv, LatentEncoderForMemoryEnv, LatentDecoderForMemoryEnv, FeatureAndGradientHook #,ModelGradientHook
 from .unizero_world_models.tokenizer import Tokenizer
 from .unizero_world_models.world_model_multitask import WorldModelMT
 
@@ -189,6 +189,11 @@ def __init__(
                 self.encoder_hook = FeatureAndGradientHook()
                 self.encoder_hook.setup_hooks(self.representation_network)
 
+            # if True: # Fixme: for debug
+            #     # 增加对encoder的hook,监控传播到encoder 上的梯度
+            #     self.encoder_output_hook = ModelGradientHook()
+            #     self.encoder_output_hook.setup_hook(self.representation_network)
+
             self.tokenizer = Tokenizer(encoder=self.representation_network, decoder_network=None, with_lpips=False, obs_type=world_model_cfg.obs_type)
             self.world_model = WorldModelMT(config=world_model_cfg, tokenizer=self.tokenizer)
             print(f'{sum(p.numel() for p in self.world_model.parameters())} parameters in agent.world_model')
 
@@ -1 +0,0 @@
-from .transformer import Transformer, TransformerConfig
Original file line number	Diff line number	Diff line change
`@@ -1 +0,0 @@`
`1`		`-from .transformer import Transformer, TransformerConfig`