fix torch export in moe

rebel-kblee · rebel-kblee · commit 191cb99cfbbc · 2026-04-16T02:48:36.000Z
diff --git a/vllm_rbln/model_executor/layers/quantization/fp8.py b/vllm_rbln/model_executor/layers/quantization/fp8.py
@@ -811,6 +811,9 @@ def process_weights_after_loading(self, layer):
                 max_w13_scales, requires_grad=False
             )
 
+        if getattr(layer, "_expert_map", None) is not None:
+            layer._expert_map_list = layer._expert_map.data.to(dtype=torch.int32).tolist()
+
     def select_gemm_impl(
         self,
         prepare_finalize: mk.FusedMoEPrepareAndFinalizeModular,
@@ -863,10 +866,7 @@ def apply(
 
         expert_map_const = None
         if layer.expert_map is not None:
-            expert_map_const = layer.expert_map
-            if expert_map_const.dtype != torch.int32:
-                expert_map_const = expert_map_const.to(dtype=torch.int32)
-            expert_map_const = expert_map_const.detach().clone()
+            expert_map_const = torch.tensor(layer._expert_map_list, dtype=torch.int32)
 
         tokens_mask = None
         use_moe_tokens_mask = envs.VLLM_RBLN_USE_MOE_TOKENS_MASK
diff --git a/vllm_rbln/model_executor/layers/quantization/mxfp4.py b/vllm_rbln/model_executor/layers/quantization/mxfp4.py
@@ -375,6 +375,9 @@ def process_weights_after_loading(self, layer):
         layer.register_buffer("down_proj_scales", layer.w2_weight_scale.data)
         layer.register_buffer("down_proj_bias", layer.w2_bias.data)
 
+        if getattr(layer, "_expert_map", None) is not None:
+            layer._expert_map_list = layer._expert_map.data.to(dtype=torch.int32).tolist()
+
     def select_gemm_impl(
         self,
         prepare_finalize: mk.FusedMoEPrepareAndFinalizeModular,
@@ -407,10 +410,7 @@ def apply(
         if layer.activation == MoEActivation.SWIGLUOAI:
             expert_map_const = None
             if layer.expert_map is not None:
-                assert getattr(layer, "expert_map_const", None) is not None
-                expert_map_const = torch.tensor(
-                    layer.expert_map_const, dtype=torch.int32
-                )
+                expert_map_const = torch.tensor(layer._expert_map_list, dtype=torch.int32)
 
             tokens_mask = None
             use_moe_tokens_mask = envs.VLLM_RBLN_USE_MOE_TOKENS_MASK