Skip to content

Commit 31d04df

Browse files
[TRTLLM-9108][feat] Add test configurable moe module multi gpu (NVIDIA#10699)
Signed-off-by: leslie-fang25 <leslief@nvidia.com>
1 parent ea928f6 commit 31d04df

5 files changed

Lines changed: 345 additions & 59 deletions

File tree

cpp/tensorrt_llm/nanobind/runtime/moeBindings.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,9 @@ void initMoeBindings(nb::module_& m)
9393
.def("get_pointer", &tr::SingleLayerMoeLoadBalancer::getSelfPtr,
9494
"Get the pointer of the SingleLayerMoeLoadBalancer", nb::call_guard<nb::gil_scoped_release>())
9595
.def("get_layer_id", &tr::SingleLayerMoeLoadBalancer::getLayerId,
96-
"Get the layer id of the SingleLayerMoeLoadBalancer", nb::call_guard<nb::gil_scoped_release>());
96+
"Get the layer id of the SingleLayerMoeLoadBalancer", nb::call_guard<nb::gil_scoped_release>())
97+
.def("get_old_rank_expert_ids", &tr::SingleLayerMoeLoadBalancer::getOldRankExpertIds,
98+
"Get the old rank expert ids of the SingleLayerMoeLoadBalancer", nb::call_guard<nb::gil_scoped_release>());
9799

98100
// Bind MoeLoadBalancer class
99101
nb::class_<tr::MoeLoadBalancer>(m, "MoeLoadBalancer")

cpp/tensorrt_llm/pybind/runtime/moeBindings.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,9 @@ void initMoeBindings(pybind11::module_& m)
9696
.def("get_pointer", &tr::SingleLayerMoeLoadBalancer::getSelfPtr,
9797
"Get the pointer of the SingleLayerMoeLoadBalancer", py::call_guard<py::gil_scoped_release>())
9898
.def("get_layer_id", &tr::SingleLayerMoeLoadBalancer::getLayerId,
99-
"Get the layer id of the SingleLayerMoeLoadBalancer", py::call_guard<py::gil_scoped_release>());
99+
"Get the layer id of the SingleLayerMoeLoadBalancer", py::call_guard<py::gil_scoped_release>())
100+
.def("get_old_rank_expert_ids", &tr::SingleLayerMoeLoadBalancer::getOldRankExpertIds,
101+
"Get the old rank expert ids of the SingleLayerMoeLoadBalancer", py::call_guard<py::gil_scoped_release>());
100102

101103
// Bind MoeLoadBalancer class
102104
py::class_<tr::MoeLoadBalancer>(m, "MoeLoadBalancer")

cpp/tensorrt_llm/runtime/moeLoadBalancer/moeLoadBalancer.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,12 @@ class SingleLayerMoeLoadBalancer
143143
return mLayerId;
144144
}
145145

146+
std::vector<std::vector<int>> getOldRankExpertIds() const
147+
{
148+
// This API is only used in test_moe_module to check expert placement.
149+
return mCpuPlacementInfo.oldRankExpertIds;
150+
}
151+
146152
tensorrt_llm::kernels::MoeLoadBalanceMetaInfo getMetaInfo() const
147153
{
148154
return mMetaInfo;

tensorrt_llm/_torch/modules/fused_moe/moe_load_balancer.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -449,6 +449,9 @@ def __init__(
449449
def get_layer_idx(self):
450450
return self.single_layer_load_balancer_impl.get_layer_id()
451451

452+
def get_old_rank_expert_ids(self):
453+
return self.single_layer_load_balancer_impl.get_old_rank_expert_ids()
454+
452455
def get_load_expert_ids(self):
453456
assert self.updates_enabled, "should not call get_load_expert_ids when using statistic routing"
454457
return self.load_expert_ids

0 commit comments

Comments
 (0)