Skip to content

Commit 3b77c7c

Browse files
clairesongleewenxie-amd
authored andcommitted
disable cross entropy flags to avoid conv loss divergence
1 parent 8533f8c commit 3b77c7c

30 files changed

+60
-60
lines changed

examples/megatron/configs/MI300X/deepseek_v2_lite-pretrain.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,5 +99,5 @@ modules:
9999
turbo_sync_free_moe_stage: 0
100100

101101
# Cross entropy flags
102-
cross_entropy_fusion_impl: "te"
103-
cross_entropy_loss_fusion: true
102+
# cross_entropy_fusion_impl: "te"
103+
# cross_entropy_loss_fusion: true

examples/megatron/configs/MI300X/deepseek_v3-pretrain.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,5 +87,5 @@ modules:
8787
use_turbo_grouped_mlp: true
8888

8989
# Cross entropy flags
90-
cross_entropy_fusion_impl: "te"
91-
cross_entropy_loss_fusion: true
90+
# cross_entropy_fusion_impl: "te"
91+
# cross_entropy_loss_fusion: true

examples/megatron/configs/MI300X/grok1-pretrain.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,8 @@ modules:
6161
moe_permute_fusion: false
6262
gradient_accumulation_fusion: false
6363
moe_use_legacy_grouped_gemm: true
64-
cross_entropy_fusion_impl: "te"
65-
cross_entropy_loss_fusion: true
64+
# cross_entropy_fusion_impl: "te"
65+
# cross_entropy_loss_fusion: true
6666

6767
# ckpt
6868
finetune: false

examples/megatron/configs/MI300X/grok2-pretrain.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,8 @@ modules:
6161
moe_permute_fusion: false
6262
gradient_accumulation_fusion: false
6363
moe_use_legacy_grouped_gemm: true
64-
cross_entropy_fusion_impl: "te"
65-
cross_entropy_loss_fusion: true
64+
# cross_entropy_fusion_impl: "te"
65+
# cross_entropy_loss_fusion: true
6666

6767
# ckpt
6868
finetune: false

examples/megatron/configs/MI300X/llama2_70B-pretrain.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,5 +77,5 @@ modules:
7777
use_turbo_grouped_mlp: true
7878

7979
# Cross entropy flags
80-
cross_entropy_fusion_impl: "te"
81-
cross_entropy_loss_fusion: true
80+
# cross_entropy_fusion_impl: "te"
81+
# cross_entropy_loss_fusion: true

examples/megatron/configs/MI300X/llama2_7B-pretrain.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,5 +80,5 @@ modules:
8080
# sequence_parallel: 1
8181

8282
# Cross entropy flags
83-
cross_entropy_fusion_impl: "te"
84-
cross_entropy_loss_fusion: true
83+
# cross_entropy_fusion_impl: "te"
84+
# cross_entropy_loss_fusion: true

examples/megatron/configs/MI300X/llama3.1_70B-pretrain.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,5 +76,5 @@ modules:
7676
use_turbo_attention: true
7777

7878
# Cross entropy flags
79-
cross_entropy_fusion_impl: "te"
80-
cross_entropy_loss_fusion: true
79+
# cross_entropy_fusion_impl: "te"
80+
# cross_entropy_loss_fusion: true

examples/megatron/configs/MI300X/llama3.1_8B-pretrain.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,5 +72,5 @@ modules:
7272
use_turbo_grouped_mlp: true
7373

7474
# Cross entropy flags
75-
cross_entropy_fusion_impl: "te"
76-
cross_entropy_loss_fusion: true
75+
# cross_entropy_fusion_impl: "te"
76+
# cross_entropy_loss_fusion: true

examples/megatron/configs/MI300X/llama3.3_70B-pretrain.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,5 +77,5 @@ modules:
7777
use_turbo_grouped_mlp: true
7878

7979
# Cross entropy flags
80-
cross_entropy_fusion_impl: "te"
81-
cross_entropy_loss_fusion: true
80+
# cross_entropy_fusion_impl: "te"
81+
# cross_entropy_loss_fusion: true

examples/megatron/configs/MI300X/llama3_70B-pretrain.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,5 +77,5 @@ modules:
7777
use_turbo_grouped_mlp: true
7878

7979
# Cross entropy flags
80-
cross_entropy_fusion_impl: "te"
81-
cross_entropy_loss_fusion: true
80+
# cross_entropy_fusion_impl: "te"
81+
# cross_entropy_loss_fusion: true

0 commit comments

Comments
 (0)