We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent d26c745 commit 6131bd3Copy full SHA for 6131bd3
1 file changed
flashinfer/fused_moe/core.py
@@ -2129,9 +2129,17 @@ def trtllm_mxint4_block_scale_moe_op(
2129
use_shuffled_weight=True,
2130
)
2131
tunning_config = MoERunner.tuning_config_no_hidden_states_scales
2132
+ # Create placeholder for tuning when routing_logits is None (routed mode)
2133
+ routing_logits_for_tuning = (
2134
+ routing_logits
2135
+ if routing_logits is not None
2136
+ else torch.empty(
2137
+ num_tokens, num_experts, dtype=routing_dtype, device="meta"
2138
+ )
2139
2140
inputs = [
2141
output,
- routing_logits,
2142
+ routing_logits_for_tuning,
2143
topk_ids,
2144
expert_weights,
2145
hidden_states,
0 commit comments