Skip to content

Commit dafc67d

Browse files
committed
fix merge
1 parent e7a98a9 commit dafc67d

1 file changed

Lines changed: 4 additions & 2 deletions

File tree

csrc/trtllm_fused_moe_kernel_launcher.cu

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -971,6 +971,7 @@ class Fp8BlockScaleLauncher : public FusedMoeLauncher {
971971
bool use_precomputed = expert_indices.ndim() == 2 && expert_indices.size(0) > 0;
972972
// When using pre-computed routing, pass nullptr as routing_logits to tell the
973973
// routing runner to use the pre-computed expert indices from workspace.routing_expert_indexes
974+
// FP8 only supports Mode 1 (FromLogits) and Mode 2 (PackedPrecomputed), so expertIds is nullptr
974975
routing_runner.run(
975976
use_precomputed ? nullptr : args->routing_logits, args->routing_bias, args->num_tokens,
976977
args->num_experts, args->top_k, args->n_group, args->topk_group, args->local_expert_offset,
@@ -979,8 +980,9 @@ class Fp8BlockScaleLauncher : public FusedMoeLauncher {
979980
static_cast<int*>(total_num_padded_tokens.data_ptr()),
980981
static_cast<int*>(expanded_idx_to_permuted_idx.data_ptr()),
981982
nullptr /*permuted_idx_to_expanded_idx.data_ptr()*/,
982-
static_cast<int*>(permuted_idx_to_token_idx.data_ptr()), workspace.expert_weights,
983-
static_cast<int*>(num_tokens_per_expert.data_ptr()),
983+
static_cast<int*>(permuted_idx_to_token_idx.data_ptr()),
984+
nullptr, // expertIds - FP8 doesn't support UnpackedPrecomputed mode
985+
workspace.expert_weights, static_cast<int*>(num_tokens_per_expert.data_ptr()),
984986
static_cast<int*>(cta_idx_xy_to_batch_idx.data_ptr()),
985987
static_cast<int*>(cta_idx_xy_to_mn_limit.data_ptr()),
986988
static_cast<int*>(num_non_exiting_ctas.data_ptr()), args->mDtypeElt, mRoutingBiasDtype,

0 commit comments

Comments
 (0)