Skip to content

File tree

5 files changed

+1
-5
lines changed

5 files changed

+1
-5
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
# These XLA flags are meant to be used with the JAX version in the imagen container
2-
export XLA_FLAGS="--xla_gpu_enable_latency_hiding_scheduler=false --xla_gpu_enable_async_all_gather=false --xla_gpu_enable_async_reduce_scatter=false --xla_gpu_enable_triton_gemm=false --xla_gpu_cuda_graph_level=0 --xla_gpu_enable_triton_softmax_fusion=false --xla_gpu_enable_async_all_reduce=false ${XLA_FLAGS}"
2+
export XLA_FLAGS="--xla_gpu_enable_latency_hiding_scheduler=false --xla_gpu_enable_async_all_gather=false --xla_gpu_enable_async_reduce_scatter=false --xla_gpu_enable_triton_gemm=false --xla_gpu_cuda_graph_level=0 --xla_gpu_enable_async_all_reduce=false ${XLA_FLAGS}"

rosetta/rosetta/projects/maxtext/xla_flags/llama2-7b-1N8G.env

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ export XLA_FLAGS="\
1414
--xla_gpu_enable_pipelined_reduce_scatter=true \
1515
--xla_gpu_enable_pipelined_all_reduce=true \
1616
--xla_gpu_enable_while_loop_double_buffering=true \
17-
--xla_gpu_enable_triton_softmax_fusion=false \
1817
--xla_gpu_enable_all_gather_combine_by_dim=false \
1918
--xla_gpu_enable_reduce_scatter_combine_by_dim=false \
2019
--xla_disable_hlo_passes=rematerialization \

rosetta/rosetta/projects/pax/xla_flags/common.env

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ export XLA_FLAGS="\
44
--xla_gpu_enable_latency_hiding_scheduler=true \
55
--xla_allow_excess_precision \
66
--xla_gpu_enable_highest_priority_async_stream=true \
7-
--xla_gpu_enable_triton_softmax_fusion=false \
87
--xla_gpu_all_reduce_combine_threshold_bytes=${THRESHOLD_BYTES} \
98
--xla_gpu_graph_level=0 \
109
"

rosetta/rosetta/projects/pax/xla_flags/gpt-126m.env

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ export XLA_FLAGS="\
44
--xla_gpu_enable_latency_hiding_scheduler=true \
55
--xla_allow_excess_precision \
66
--xla_gpu_enable_highest_priority_async_stream=true \
7-
--xla_gpu_enable_triton_softmax_fusion=false \
87
--xla_gpu_all_reduce_combine_threshold_bytes=${THRESHOLD_BYTES} \
98
--xla_gpu_graph_level=0 \
109
--xla_gpu_enable_cudnn_fmha=false \

rosetta/rosetta/projects/pax/xla_flags/grok-proxy.env

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ export XLA_FLAGS="\
66
--xla_gpu_enable_latency_hiding_scheduler=true \
77
--xla_allow_excess_precision \
88
--xla_gpu_enable_highest_priority_async_stream=true \
9-
--xla_gpu_enable_triton_softmax_fusion=false \
109
--xla_gpu_all_reduce_combine_threshold_bytes=${ALL_REDUCE_THRESHOLD_BYTES} \
1110
--xla_gpu_graph_level=0 \
1211
--xla_gpu_all_gather_combine_threshold_bytes=${ALL_GATHER_THRESHOLD_BYTES} \

0 commit comments

Comments
 (0)