Skip to content

Commit 0378298

Browse files
authored
Replace deprecated flag xla_gpu_graph_level. (#1244)
The replacement is xla_gpu_enable_command_buffer: https://github.com/openxla/xla/blob/5d92a4430f26fd73593ac92657507db21d131f13/xla/debug_options_flags.cc#L1412-L1414
1 parent 9ba6461 commit 0378298

File tree

9 files changed

+9
-9
lines changed

9 files changed

+9
-9
lines changed

.github/container/test-maxtext.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,7 @@ fi
233233

234234
export BASE_XLA_FLAGS=${BASE_XLA_FLAGS:---xla_gpu_enable_latency_hiding_scheduler=true
235235
--xla_gpu_enable_triton_gemm=false
236-
--xla_gpu_graph_level=0
236+
--xla_gpu_enable_command_buffer=
237237
--xla_gpu_all_reduce_combine_threshold_bytes=1073741824
238238
--xla_gpu_all_gather_combine_threshold_bytes=1073741824
239239
--xla_gpu_reduce_scatter_combine_threshold_bytes=134217728

rosetta/docs/PGLE.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ In order to get the best performance with PGLE, here is a list of all recommende
6262
```
6363
export XLA_FLAGS="--xla_gpu_enable_latency_hiding_scheduler=true
6464
--xla_gpu_enable_triton_gemm=false
65-
--xla_gpu_graph_level=0
65+
--xla_gpu_enable_command_buffer=
6666
--xla_gpu_all_reduce_combine_threshold_bytes=1073741824
6767
--xla_gpu_all_gather_combine_threshold_bytes=1073741824
6868
--xla_gpu_reduce_scatter_combine_threshold_bytes=1073741824

rosetta/rosetta/projects/maxtext/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ The [GPU Performance document](../../../docs/GPU_performance.md) provides a deta
6969
```
7070
XLA_FLAGS="--xla_gpu_enable_latency_hiding_scheduler=true
7171
--xla_gpu_enable_triton_gemm=false
72-
--xla_gpu_graph_level=0
72+
--xla_gpu_enable_command_buffer=
7373
--xla_gpu_all_reduce_combine_threshold_bytes=1073741824
7474
--xla_gpu_all_gather_combine_threshold_bytes=1073741824
7575
--xla_gpu_reduce_scatter_combine_threshold_bytes=134217728

rosetta/rosetta/projects/maxtext/scripts/example_slurm.sub

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ export NCCL_IB_SL=1
5454
# Set XLA Flags
5555
export XLA_FLAGS="--xla_gpu_enable_latency_hiding_scheduler=true
5656
--xla_gpu_enable_triton_gemm=false
57-
--xla_gpu_graph_level=0
57+
--xla_gpu_enable_command_buffer=
5858
--xla_gpu_all_reduce_combine_threshold_bytes=1073741824
5959
--xla_gpu_all_gather_combine_threshold_bytes=1073741824
6060
--xla_gpu_reduce_scatter_combine_threshold_bytes=134217728

rosetta/rosetta/projects/maxtext/xla_flags/llama2-7b-1N8G.env

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ THRESHOLD_BYTES=1073741824
55
export XLA_FLAGS="\
66
--xla_gpu_enable_latency_hiding_scheduler=true \
77
--xla_gpu_enable_triton_gemm=false \
8-
--xla_gpu_graph_level=0 \
8+
--xla_gpu_enable_command_buffer= \
99
--xla_gpu_enable_highest_priority_async_stream=true \
1010
--xla_gpu_all_reduce_combine_threshold_bytes=${THRESHOLD_BYTES} \
1111
--xla_gpu_all_gather_combine_threshold_bytes=$((THRESHOLD_BYTES/(NUM_NODES*NUM_GPUS))) \

rosetta/rosetta/projects/pax/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ For the the 126M model, we recommend setting `--xla_gpu_all_reduce_combine_thres
141141
BASE_XLA_FLAGS="--xla_gpu_enable_latency_hiding_scheduler=true
142142
--xla_gpu_enable_triton_gemm=false
143143
--xla_gpu_all_reduce_combine_threshold_bytes=33554432
144-
--xla_gpu_graph_level=0" bash run_pile_multinode.sh ...
144+
--xla_gpu_enable_command_buffer=" bash run_pile_multinode.sh ...
145145
```
146146

147147
# Configs

rosetta/rosetta/projects/pax/xla_flags/common.env

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ export XLA_FLAGS="\
66
--xla_gpu_enable_highest_priority_async_stream=true \
77
--xla_gpu_enable_triton_softmax_fusion=false \
88
--xla_gpu_all_reduce_combine_threshold_bytes=${THRESHOLD_BYTES} \
9-
--xla_gpu_graph_level=0 \
9+
--xla_gpu_enable_command_buffer= \
1010
"
1111
export XLA_PYTHON_CLIENT_MEM_FRACTION=0.8
1212
unset THRESHOLD_BYTES

rosetta/rosetta/projects/pax/xla_flags/gpt-126m.env

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ export XLA_FLAGS="\
66
--xla_gpu_enable_highest_priority_async_stream=true \
77
--xla_gpu_enable_triton_softmax_fusion=false \
88
--xla_gpu_all_reduce_combine_threshold_bytes=${THRESHOLD_BYTES} \
9-
--xla_gpu_graph_level=0 \
9+
--xla_gpu_enable_command_buffer= \
1010
--xla_gpu_enable_cudnn_fmha=false \
1111
"
1212
export XLA_PYTHON_CLIENT_MEM_FRACTION=0.8

rosetta/rosetta/projects/pax/xla_flags/grok-proxy.env

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ export XLA_FLAGS="\
88
--xla_gpu_enable_highest_priority_async_stream=true \
99
--xla_gpu_enable_triton_softmax_fusion=false \
1010
--xla_gpu_all_reduce_combine_threshold_bytes=${ALL_REDUCE_THRESHOLD_BYTES} \
11-
--xla_gpu_graph_level=0 \
11+
--xla_gpu_enable_command_buffer= \
1212
--xla_gpu_all_gather_combine_threshold_bytes=${ALL_GATHER_THRESHOLD_BYTES} \
1313
--xla_gpu_reduce_scatter_combine_threshold_bytes=${REDUCE_SCATTER_THRESHOLD_BYTES} \
1414
--xla_gpu_enable_pipelined_all_gather=true \

0 commit comments

Comments
 (0)