Skip to content

Commit a1550fc

Browse files
Update benchmarks.yml
1 parent 2222254 commit a1550fc

File tree

1 file changed

+112
-15
lines changed

1 file changed

+112
-15
lines changed

.github/workflows/benchmarks.yml

Lines changed: 112 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -81,29 +81,126 @@ jobs:
8181
- name: Build hlo_runner_main
8282
working-directory: xla
8383
run: bazel build -c opt --config=cuda --dynamic_mode=off //xla/tools/multihost_hlo_runner:hlo_runner_main
84+
name: Benchmarks
8485

85-
# - name: Wait For Connection
86-
# uses: google-ml-infra/actions/ci_connection@main
87-
# with:
88-
# halt-dispatch-input: ${{ inputs.halt-for-connection }}
86+
on:
87+
pull_request:
88+
branches:
89+
- main
90+
workflow_dispatch:
91+
inputs:
92+
halt-for-connection:
93+
description: 'Should this workflow run wait for a remote connection?'
94+
type: choice
95+
required: true
96+
default: 'no'
97+
options:
98+
- 'yes'
99+
- 'no'
100+
101+
jobs:
102+
build-xla-gpu-and-test:
103+
runs-on: linux-x86-g2-48-l4-4gpu
104+
container:
105+
image: "gcr.io/tensorflow-testing/nosla-cuda12.3-cudnn9.1-ubuntu20.04-manylinux2014-multipython:latest"
106+
options: --gpus all --privileged
107+
108+
steps:
109+
- name: Checkout XLA
110+
uses: actions/checkout@v3
111+
with:
112+
repository: openxla/xla
113+
path: xla
114+
115+
- name: Create results directory
116+
working-directory: xla
117+
run: mkdir -p results
89118

90-
- name: Run HLO Module Benchmarks withg GPU in xla/tests/fuzz
119+
- name: Get GPU spec
91120
working-directory: xla
92121
continue-on-error: true
122+
run: nvidia-smi
123+
124+
- name: Configure XLA
125+
working-directory: xla
126+
run: ./configure.py --backend CUDA --nccl
127+
128+
- name: Set TF_CPP_MAX_VLOG_LEVEL
129+
working-directory: xla
130+
run: echo "TF_CPP_MAX_VLOG_LEVEL=1" >> $GITHUB_ENV # Use GITHUB_ENV to persist across steps
131+
132+
- name: Check TF_CPP_MAX_VLOG_LEVEL
133+
working-directory: xla
134+
run: echo "$TF_CPP_MAX_VLOG_LEVEL"
135+
136+
- name: Build hlo_runner_main
137+
working-directory: xla
138+
run: bazel build -c opt --config=cuda --dynamic_mode=off //xla/tools/multihost_hlo_runner:hlo_runner_main
139+
140+
- name: Create b284431534_transpose_convert_f32_s8.hlo
141+
working-directory: xla
93142
run: |
94-
for file in xla/tests/fuzz/*.hlo; do
95-
filename=$(basename "$file")
96-
# Skip expected failed hlo files.
97-
if [[ "$filename" == "rand_000060.hlo" || "$filename" == "rand_000067.hlo" || "$filename" == "rand_000072.hlo" ]]; then
98-
echo "Skipping benchmark on $file"
99-
continue
100-
fi
101-
echo "Running benchmark on $file" &> results/"$filename".log
102-
./bazel-bin/xla/tools/multihost_hlo_runner/hlo_runner_main --device_type=gpu --use_spmd_partitioning "$file" &> results/"$filename".log
103-
done
143+
cat << EOF > b284431534_transpose_convert_f32_s8.hlo
144+
HloModule test, entry_computation_layout={(f32[1,4,32,192,384]{4,3,2,1,0})->s8[1,4,192,384,32]{4,3,2,1,0}}
145+
146+
fusion {
147+
param_0 = f32[1,4,32,192,384] parameter(0)
148+
transpose = f32[1,4,192,384,32] transpose(param_0), dimensions={0,1,3,4,2}
149+
ROOT convert = s8[1,4,192,384,32] convert(transpose)
150+
}
151+
152+
ENTRY main {
153+
param_0 = f32[1,4,32,192,384] parameter(0)
154+
ROOT fusion = s8[1,4,192,384,32] fusion(param_0), kind=kInput, calls=fusion
155+
}
156+
EOF
157+
158+
- name: Run specific HLO file
159+
working-directory: xla
160+
run: ./bazel-bin/xla/tools/multihost_hlo_runner/hlo_runner_main --device_type=gpu --use_spmd_partitioning b284431534_transpose_convert_f32_s8.hlo &> results/b284431534_transpose_convert_f32_s8.hlo.log
161+
162+
# - name: Run HLO Module Benchmarks with GPU in xla/tests/fuzz
163+
# working-directory: xla
164+
# continue-on-error: true
165+
# run: |
166+
# for file in xla/tests/fuzz/*.hlo; do
167+
# filename=$(basename "$file")
168+
# # Skip expected failed hlo files.
169+
# if [[ "$filename" == "rand_000060.hlo" || "$filename" == "rand_000067.hlo" || "$filename" == "rand_000072.hlo" ]]; then
170+
# echo "Skipping benchmark on $file"
171+
# continue
172+
# fi
173+
# echo "Running benchmark on $file"
174+
# ./bazel-bin/xla/tools/multihost_hlo_runner/hlo_runner_main --device_type=gpu --use_spmd_partitioning "$file" &> results/"$filename".log
175+
# done
104176

105177
- name: Upload Results
106178
uses: actions/upload-artifact@v4
107179
with:
108180
name: gpu-xla-benchmarks
109181
path: xla/results
182+
# - name: Wait For Connection
183+
# uses: google-ml-infra/actions/ci_connection@main
184+
# with:
185+
# halt-dispatch-input: ${{ inputs.halt-for-connection }}
186+
187+
# - name: Run HLO Module Benchmarks withg GPU in xla/tests/fuzz
188+
# working-directory: xla
189+
# continue-on-error: true
190+
# run: |
191+
# for file in xla/tests/fuzz/*.hlo; do
192+
# filename=$(basename "$file")
193+
# # Skip expected failed hlo files.
194+
# if [[ "$filename" == "rand_000060.hlo" || "$filename" == "rand_000067.hlo" || "$filename" == "rand_000072.hlo" ]]; then
195+
# echo "Skipping benchmark on $file"
196+
# continue
197+
# fi
198+
# echo "Running benchmark on $file" &> results/"$filename".log
199+
# ./bazel-bin/xla/tools/multihost_hlo_runner/hlo_runner_main --device_type=gpu --use_spmd_partitioning "$file" &> results/"$filename".log
200+
# done
201+
202+
# - name: Upload Results
203+
# uses: actions/upload-artifact@v4
204+
# with:
205+
# name: gpu-xla-benchmarks
206+
# path: xla/results

0 commit comments

Comments
 (0)