We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent c817d08 commit 6f84ea6Copy full SHA for 6f84ea6
primus/configs/models/megatron/grok_base.yaml
@@ -12,6 +12,6 @@ num_query_groups: 8
12
num_experts: 8
13
moe_router_topk: 2
14
moe_router_load_balancing_type: none
15
-moe_aux_loss_coeff: 1e-2
+moe_aux_loss_coeff: 1.0e-2
16
moe_grouped_gemm: true
17
moe_token_dispatcher_type: alltoall
primus/configs/models/megatron/mixtral_base.yaml
moe_router_load_balancing_type: aux_loss
0 commit comments