Create a workflow to run benchmarks #212
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Benchmarks | |
| on: | |
| pull_request: | |
| branches: | |
| - main | |
| # workflow_dispatch: # Allows manual triggering | |
| # schedule: | |
| # - cron: '0 */6 * * *' # Run every 6 hours (at minute 0 of hours 0, 6, 12, 18) | |
| workflow_dispatch: | |
| inputs: | |
| halt-for-connection: | |
| description: 'Should this workflow run wait for a remote connection?' | |
| type: choice | |
| required: true | |
| default: 'no' | |
| options: | |
| - 'yes' | |
| - 'no' | |
| jobs: | |
| build-xla-gpu-and-test: | |
| runs-on: "linux-x86-g2-48-l4-4gpu" #linux-x86-n2-16 # Use a GPU-enabled runner | |
| container: | |
| image: "gcr.io/tensorflow-testing/nosla-cuda12.3-cudnn9.1-ubuntu20.04-manylinux2014-multipython:latest" | |
| options: --gpus all --privileged # Might need privileged mode, use with caution | |
| steps: | |
| - name: Checkout XLA | |
| uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 | |
| with: | |
| repository: openxla/xla | |
| - name: Print machine specs | |
| run: | | |
| nvidia-smi | |
| free -h # Memory information | |
| df -h # Disk space information | |
| uname -a # Kernel information | |
| - name: Create results directory | |
| run: | | |
| mkdir results | |
| ls | |
| - name: Configure XLA for GPU backend | |
| run: | | |
| ls | |
| ./configure.py --backend CUDA --nccl | |
| - name: Set TF_CPP_MAX_VLOG_LEVEL | |
| env: | |
| TF_CPP_MAX_VLOG_LEVEL: 1 | |
| run: | | |
| echo "TF_CPP_MAX_VLOG_LEVEL is: $TF_CPP_MAX_VLOG_LEVEL" | |
| - name: Build hlo_runner_main | |
| run: bazel build -c opt --config=cuda --dynamic_mode=off //xla/tools/multihost_hlo_runner:hlo_runner_main | |
| # - name: Wait For Connection | |
| # uses: google-ml-infra/actions/ci_connection@main | |
| # with: | |
| # halt-dispatch-input: ${{ inputs.halt-for-connection }} | |
| # TODO(juliagmt): Add more performance-criticalHLOs to benchmark. | |
| - name: Run xla/tools/hlo_opt/tests/gpu_hlo_pass.hlo | |
| run: | | |
| ./bazel-bin/xla/tools/multihost_hlo_runner/hlo_runner_main --device_type=gpu --log_output=True --use_spmd_partitioning --xla_gpu_dump_xspace_to=results/xspace.pb xla/tools/hlo_opt/tests/gpu_hlo_pass.hlo | |
| - name: Compute the cost of gpu_hlo_pass.hlo | |
| run: | | |
| PWD=$(pwd) | |
| bazel run //xla/tools:compute_cost -- --input=$PWD/xla/tools/hlo_opt/tests/gpu_hlo_pass.hlo --format=hlo --gpu | |
| - name: Set XSPACE_PATH Variable | |
| env: | |
| XSPACE_PATH: $PWD/results/xspace.pb | |
| run: | | |
| echo "XSPACE_PATH is: $XSPACE_PATH" | |
| - name: Checkout juliagmt-google/xla | |
| uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # Replace with the current SHA of v4.1.1 or later | |
| with: | |
| repository: juliagmt-google/xla | |
| path: juliagmt-google-xla | |
| - name: Wait For Connection | |
| uses: google-ml-infra/actions/ci_connection@main | |
| with: | |
| halt-dispatch-input: ${{ inputs.halt-for-connection }} | |
| - name: Run get_device_stats_main | |
| run: | | |
| bazel run //xla/tools:get_device_stats_main -- --input=$XSPACE_PATH | |
| working-directory: juliagmt-google-xla | |
| - name: Upload Artifacts | |
| uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 | |
| with: | |
| name: gpu-xla-benchmarks | |
| path: results | |
| # - name: Print machine specs | |
| # run: | | |
| # lscpu | |
| # free -h # Memory information | |
| # df -h # Disk space information | |
| # uname -a # Kernel information | |
| # - name: Create results directory | |
| # working-directory: xla | |
| # run: mkdir results | |
| # - name: Wait For Connection | |
| # uses: google-ml-infra/actions/ci_connection@main | |
| # with: | |
| # halt-dispatch-input: ${{ inputs.halt-for-connection }} | |
| # - name: Configure XLA | |
| # working-directory: xla | |
| # run: | | |
| # cd .. | |
| # ls | |
| # ./configure.py --backend CUDA --nccl | |
| # - name: Set TF_CPP_MAX_VLOG_LEVEL | |
| # working-directory: xla | |
| # run: echo "TF_CPP_MAX_VLOG_LEVEL=1" >> $GITHUB_ENV # Use GITHUB_ENV to persist across steps | |
| # - name: Build hlo_runner_main | |
| # working-directory: xla | |
| # run: bazel build -c opt --config=cuda --dynamic_mode=off //xla/tools/multihost_hlo_runner:hlo_runner_main | |
| # - name: Wait For Connection | |
| # uses: google-ml-infra/actions/ci_connection@main | |
| # with: | |
| # halt-dispatch-input: ${{ inputs.halt-for-connection }} | |
| # - name: Wait For Connection | |
| # uses: google-ml-infra/actions/ci_connection@main | |
| # with: | |
| # halt-dispatch-input: ${{ inputs.halt-for-connection }} | |
| # - name: Run an HLO file | |
| # working-directory: xla | |
| # run: | | |
| # cd .. | |
| # ./bazel-bin/xla/tools/multihost_hlo_runner/hlo_runner_main --device_type=gpu --log_output=True --use_spmd_partitioning --xla_gpu_dump_xspace_to=xla/results/xspace.pb xla/tools/hlo_opt/tests/gpu_hlo_pass.hlo | |
| # - name: Get Device Stats | |
| # working-directory: xla | |
| # run: | | |
| # cd .. | |
| # PWD=$(pwd) | |
| # bazel run //xla/tools:get_device_stats_main -- --input=$PWD/xla/results/xspace.pb | |
| # - name: Wait For Connection | |
| # uses: google-ml-infra/actions/ci_connection@main | |
| # with: | |
| # halt-dispatch-input: ${{ inputs.halt-for-connection }} | |
| # - name: Download parse_xla_logs.py | |
| # working-directory: xla | |
| # run: wget https://raw.githubusercontent.com/juliagmt-google/xla/main/.github/workflows/parse_xla_logs.py | |
| # - name: Parse XLA logs | |
| # working-directory: xla | |
| # run: python parse_xla_logs.py results/gpu_hlo_backend.log | |
| # - name: Upload Results | |
| # uses: actions/upload-artifact@v4 | |
| # with: | |
| # name: gpu-xla-benchmarks | |
| # path: xla/results | |
| # jobs: | |
| # Tests: | |
| # strategy: | |
| # # Don't fail fast - want to see results for all builds even if one fails. | |
| # fail-fast: false | |
| # matrix: | |
| # job_info: | |
| # - os: "linux-x86-g2-48-l4-4gpu" | |
| # container: "us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/ml-build:latest", | |
| # pretty_name: "Linux X86 runner with 4 NVIDIA L4 GPUs" | |
| # # Expect more GPU types in the future. | |
| # name: ${{ matrix.job_info.pretty_name }} | |
| # runs-on: ${{ matrix.job_info.os }} | |
| # container: ${{ matrix.job_info.container }} | |
| # defaults: | |
| # run: | |
| # shell: bash | |
| # timeout-minutes: 360 | |
| # steps: | |
| # - name: Checkout XLA | |
| # uses: actions/checkout@v4 # v4.1.1 | |
| # with: | |
| # repository: openxla/xla | |
| # - name: Wait For Connection | |
| # uses: google-ml-infra/actions/ci_connection@main | |
| # with: | |
| # halt-dispatch-input: ${{ inputs.halt-for-connection }} | |
| # - name: Print machine specs | |
| # run: | | |
| # nvidia-smi | |
| # free -h # Memory information | |
| # df -h # Disk space information | |
| # uname -a # Kernel information | |
| # - name: Create results directory | |
| # run: mkdir results | |
| # - name: Configure XLA for GPU backend | |
| # run: ./configure.py --backend CUDA --nccl | |
| # - name: Set TF_CPP_MAX_VLOG_LEVEL | |
| # env: | |
| # TF_CPP_MAX_VLOG_LEVEL: 1 | |
| # run: | | |
| # echo "TF_CPP_MAX_VLOG_LEVEL is: $TF_CPP_MAX_VLOG_LEVEL" | |
| # - name: Build hlo_runner_main | |
| # run: bazel build -c opt --config=cuda --dynamic_mode=off //xla/tools/multihost_hlo_runner:hlo_runner_main | |
| # # TODO(juliagmt): Add more performance-criticalHLOs to benchmark. | |
| # - name: Run xla/tools/hlo_opt/tests/gpu_hlo_pass.hlo | |
| # run: ./bazel-bin/xla/tools/multihost_hlo_runner/hlo_runner_main --device_type=gpu --log_output=True --use_spmd_partitioning --xla_gpu_dump_xspace_to=xla/results/xspace.pbtxt xla/tools/hlo_opt/tests/gpu_hlo_pass.hlo | |
| # - name: Upload XSpace | |
| # uses: actions/upload-artifact@v4 # v4.1.1 | |
| # with: | |
| # name: gpu-xla-benchmarks-xspace | |
| # path: xla/results/xspace.pbtxt |