@@ -71,38 +71,68 @@ jobs:
7171 with :
7272 repository : openxla/xla # Replace with your fork if needed
7373 path : xla
74-
75- # - name: Build XLA with GPU support
76- # working-directory: xla
77- # continue-on-error: true
78- # run: bazel build --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,requires-gpu-sm75-only,requires-gpu-sm60,requires-gpu-sm70,-requires-gpu-sm80,-requires-gpu-sm80-only,-requires-gpu-sm90,-requires-gpu-sm90-only,-requires-gpu-sm100,-requires-gpu-sm100-only,-requires-gpu-amd --config=warnings --config=rbe_linux_cuda_nvcc --run_under=//tools/ci_build/gpu_build:parallel_gpu_execute --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=7.5 --@cuda_driver//:enable_forward_compatibility=true --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async --nobuild -- //xla/... //build_tools/... @tsl//tsl/...
79-
80- # - name: Run XLA tests with GPU
81- # working-directory: xla
82- # continue-on-error: true
83- # run: bazel test --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,requires-gpu-sm75-only,requires-gpu-sm60,requires-gpu-sm70,-requires-gpu-sm80,-requires-gpu-sm80-only,-requires-gpu-sm90,-requires-gpu-sm90-only,-requires-gpu-sm100,-requires-gpu-sm100-only,-requires-gpu-amd --config=warnings --config=rbe_linux_cuda_nvcc --run_under=//tools/ci_build/gpu_build:parallel_gpu_execute --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=7.5 --@cuda_driver//:enable_forward_compatibility=true --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async -- //xla/... //build_tools/... @tsl//tsl/...
74+
75+ - name : Configure XLA
76+ working-directory : xla
77+ run : ./configure.py --backend CUDA --nccl
8478
85- # - name: Run Profile Analysis
86- # working-directory: xla
87- # continue-on-error: true
88- # run: bazel analyze-profile profile.json.gz
79+ - name : Set TF_CPP_MAX_VLOG_LEVEL
80+ working-directory : xla
81+ run : echo "TF_CPP_MAX_VLOG_LEVEL=1" >> $GITHUB_ENV
8982
90- # - name: Get GPU spec
91- # working-directory: xla
92- # continue-on-error: true
93- # run: nvidia-smi
9483 - name : Wait For Connection
9584 uses : google-ml-infra/actions/ci_connection@main
9685 with :
9786 halt-dispatch-input : ${{ inputs.halt-for-connection }}
98-
99- - name : Configure XLA
87+
88+ - name : Build XLA with GPU support with RBE
10089 working-directory : xla
101- run : ./configure.py --backend CUDA --nccl
90+ continue-on-error : true
91+ run : bazel build --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,requires-gpu-sm75-only,requires-gpu-sm60,requires-gpu-sm70,-requires-gpu-sm80,-requires-gpu-sm80-only,-requires-gpu-sm90,-requires-gpu-sm90-only,-requires-gpu-sm100,-requires-gpu-sm100-only,-requires-gpu-amd --config=warnings --config=rbe_linux_cuda_nvcc --run_under=//tools/ci_build/gpu_build:parallel_gpu_execute --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=7.5 --@cuda_driver//:enable_forward_compatibility=true --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async --nobuild -- //xla/... //build_tools/... @tsl//tsl/...
92+
93+ - name : Run XLA tests with GPU support with RBE
94+ working-directory : xla
95+ continue-on-error : true
96+ run : bazel test --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,requires-gpu-sm75-only,requires-gpu-sm60,requires-gpu-sm70,-requires-gpu-sm80,-requires-gpu-sm80-only,-requires-gpu-sm90,-requires-gpu-sm90-only,-requires-gpu-sm100,-requires-gpu-sm100-only,-requires-gpu-amd --config=warnings --config=rbe_linux_cuda_nvcc --run_under=//tools/ci_build/gpu_build:parallel_gpu_execute --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=7.5 --@cuda_driver//:enable_forward_compatibility=true --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async -- //xla/... //build_tools/... @tsl//tsl/...
10297
103- - name : Set TF_CPP_MAX_VLOG_LEVEL
98+ - name : Run Profile Analysis
10499 working-directory : xla
105- run : echo "TF_CPP_MAX_VLOG_LEVEL=1" >> $GITHUB_ENV
100+ continue-on-error : true
101+ run : bazel analyze-profile profile.json.gz
102+
103+ - name : Get GPU spec
104+ working-directory : xla
105+ continue-on-error : true
106+ run : nvidia-smi
107+
108+ - name : Build run_hlo_module with GPU
109+ working-directory : openxla
110+ run : bazel build -c opt --dynamic_mode=off xla/tools:run_hlo_module --config=cuda
111+
112+ - name : Run HLO Module Benchmarks withg GPU in xla/tests/fuzz
113+ working-directory : openxla
114+ continue-on-error : true
115+ run : |
116+ for file in xla/tests/fuzz/*.hlo; do
117+ filename=$(basename "$file")
118+ # Skip expected failed hlo files.
119+ if [[ "$filename" == "rand_000060.hlo" || "$filename" == "rand_000067.hlo" || "$filename" == "rand_000072.hlo" ]]; then
120+ echo "Skipping benchmark on $file"
121+ continue
122+ fi
123+ echo "Running benchmark on $file"
124+ ./bazel-bin/xla/tools/run_hlo_module --input_format=hlo --platform=GPU "$file"
125+ done
126+
127+ - name : Run HLO Module Benchmarks withg GPU in xla/service/gpu
128+ working-directory : openxla
129+ continue-on-error : true
130+ run : |
131+ find xla/service/gpu -name "*.hlo" -print0 | while IFS= read -r -d $'\0' hlo_file; do
132+ echo "Running: $hlo_file"
133+ ./bazel-bin/xla/tools/run_hlo_module --input_format=hlo --platform=GPU "$hlo_file"
134+ echo "--------------------------------------------------"
135+ done
106136
107137 - name : Build hlo_runner_main
108138 working-directory : xla
0 commit comments