[TRTLLM-9108][feat] Add test configurable moe module multi gpu (NVIDIA#10699)

leslie-fang25 · web-flow · commit 31d04dfa128b · 2026-01-23T10:16:58.000+08:00
Signed-off-by: leslie-fang25 &lt;leslief@nvidia.com&gt;
diff --git a/cpp/tensorrt_llm/nanobind/runtime/moeBindings.cpp b/cpp/tensorrt_llm/nanobind/runtime/moeBindings.cpp
@@ -93,7 +93,9 @@ void initMoeBindings(nb::module_& m)
         .def("get_pointer", &tr::SingleLayerMoeLoadBalancer::getSelfPtr,
             "Get the pointer of the SingleLayerMoeLoadBalancer", nb::call_guard<nb::gil_scoped_release>())
         .def("get_layer_id", &tr::SingleLayerMoeLoadBalancer::getLayerId,
-            "Get the layer id of the SingleLayerMoeLoadBalancer", nb::call_guard<nb::gil_scoped_release>());
+            "Get the layer id of the SingleLayerMoeLoadBalancer", nb::call_guard<nb::gil_scoped_release>())
+        .def("get_old_rank_expert_ids", &tr::SingleLayerMoeLoadBalancer::getOldRankExpertIds,
+            "Get the old rank expert ids of the SingleLayerMoeLoadBalancer", nb::call_guard<nb::gil_scoped_release>());
 
     // Bind MoeLoadBalancer class
     nb::class_<tr::MoeLoadBalancer>(m, "MoeLoadBalancer")
diff --git a/cpp/tensorrt_llm/pybind/runtime/moeBindings.cpp b/cpp/tensorrt_llm/pybind/runtime/moeBindings.cpp
@@ -96,7 +96,9 @@ void initMoeBindings(pybind11::module_& m)
         .def("get_pointer", &tr::SingleLayerMoeLoadBalancer::getSelfPtr,
             "Get the pointer of the SingleLayerMoeLoadBalancer", py::call_guard<py::gil_scoped_release>())
         .def("get_layer_id", &tr::SingleLayerMoeLoadBalancer::getLayerId,
-            "Get the layer id of the SingleLayerMoeLoadBalancer", py::call_guard<py::gil_scoped_release>());
+            "Get the layer id of the SingleLayerMoeLoadBalancer", py::call_guard<py::gil_scoped_release>())
+        .def("get_old_rank_expert_ids", &tr::SingleLayerMoeLoadBalancer::getOldRankExpertIds,
+            "Get the old rank expert ids of the SingleLayerMoeLoadBalancer", py::call_guard<py::gil_scoped_release>());
 
     // Bind MoeLoadBalancer class
     py::class_<tr::MoeLoadBalancer>(m, "MoeLoadBalancer")
diff --git a/cpp/tensorrt_llm/runtime/moeLoadBalancer/moeLoadBalancer.h b/cpp/tensorrt_llm/runtime/moeLoadBalancer/moeLoadBalancer.h
@@ -143,6 +143,12 @@ class SingleLayerMoeLoadBalancer
         return mLayerId;
     }
 
+    std::vector<std::vector<int>> getOldRankExpertIds() const
+    {
+        // This API is only used in test_moe_module to check expert placement.
+        return mCpuPlacementInfo.oldRankExpertIds;
+    }
+
     tensorrt_llm::kernels::MoeLoadBalanceMetaInfo getMetaInfo() const
     {
         return mMetaInfo;
diff --git a/tensorrt_llm/_torch/modules/fused_moe/moe_load_balancer.py b/tensorrt_llm/_torch/modules/fused_moe/moe_load_balancer.py
@@ -449,6 +449,9 @@ def __init__(
     def get_layer_idx(self):
         return self.single_layer_load_balancer_impl.get_layer_id()
 
+    def get_old_rank_expert_ids(self):
+        return self.single_layer_load_balancer_impl.get_old_rank_expert_ids()
+
     def get_load_expert_ids(self):
         assert self.updates_enabled, "should not call get_load_expert_ids when using statistic routing"
         return self.load_expert_ids
diff --git a/tests/unittest/_torch/modules/moe/test_moe_module.py b/tests/unittest/_torch/modules/moe/test_moe_module.py

Original file line number	Diff line number	Diff line change
`@@ -143,6 +143,12 @@ class SingleLayerMoeLoadBalancer`
`143`	`143`	`return mLayerId;`
`144`	`144`	`}`
`145`	`145`
	`146`	`+ std::vector<std::vector<int>> getOldRankExpertIds() const`
	`147`	`+ {`
	`148`	`+ // This API is only used in test_moe_module to check expert placement.`
	`149`	`+ return mCpuPlacementInfo.oldRankExpertIds;`
	`150`	`+ }`
	`151`	`+`
`146`	`152`	`tensorrt_llm::kernels::MoeLoadBalanceMetaInfo getMetaInfo() const`
`147`	`153`	`{`
`148`	`154`	`return mMetaInfo;`