We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent e4f7303 commit 9f3a99dCopy full SHA for 9f3a99d
1 file changed
flashinfer/fused_moe/core.py
@@ -2154,9 +2154,17 @@ def trtllm_mxint4_block_scale_moe_op(
2154
use_shuffled_weight=True,
2155
)
2156
tunning_config = MoERunner.tuning_config_no_hidden_states_scales
2157
+ # Create placeholder for tuning when routing_logits is None (routed mode)
2158
+ routing_logits_for_tuning = (
2159
+ routing_logits
2160
+ if routing_logits is not None
2161
+ else torch.empty(
2162
+ num_tokens, num_experts, dtype=routing_dtype, device="meta"
2163
+ )
2164
2165
inputs = [
2166
output,
- routing_logits,
2167
+ routing_logits_for_tuning,
2168
topk_ids,
2169
expert_weights,
2170
hidden_states,
0 commit comments