Create a workflow to run benchmarks #180
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Benchmarks | |
| on: | |
| pull_request: | |
| branches: | |
| - main | |
| workflow_dispatch: # Allows manual triggering | |
| schedule: | |
| - cron: '0 */6 * * *' # Run every 6 hours (at minute 0 of hours 0, 6, 12, 18) | |
| # workflow_dispatch: | |
| # inputs: | |
| # halt-for-connection: | |
| # description: 'Should this workflow run wait for a remote connection?' | |
| # type: choice | |
| # required: true | |
| # default: 'no' | |
| # options: | |
| # - 'yes' | |
| # - 'no' | |
| jobs: | |
| build-xla-gpu-and-test: | |
| runs-on: "linux-x86-g2-48-l4-4gpu" #linux-x86-n2-16 # Use a GPU-enabled runner | |
| container: | |
| image: "gcr.io/tensorflow-testing/nosla-cuda12.3-cudnn9.1-ubuntu20.04-manylinux2014-multipython:latest" | |
| options: --gpus all --privileged # Might need privileged mode, use with caution | |
| steps: | |
| - name: Checkout XLA | |
| uses: actions/checkout@v3 | |
| with: | |
| repository: juliagmt-google/xla | |
| - name: Print machine specs | |
| run: | | |
| lscpu | |
| free -h # Memory information | |
| df -h # Disk space information | |
| uname -a # Kernel information | |
| - name: Create results directory | |
| working-directory: xla | |
| run: mkdir results | |
| # - name: Wait For Connection | |
| # uses: google-ml-infra/actions/ci_connection@main | |
| # with: | |
| # halt-dispatch-input: ${{ inputs.halt-for-connection }} | |
| - name: Configure XLA | |
| working-directory: xla | |
| run: | | |
| cd .. | |
| ls | |
| ./configure.py --backend CUDA --nccl | |
| - name: Set TF_CPP_MAX_VLOG_LEVEL | |
| working-directory: xla | |
| run: echo "TF_CPP_MAX_VLOG_LEVEL=1" >> $GITHUB_ENV # Use GITHUB_ENV to persist across steps | |
| - name: Build hlo_runner_main | |
| working-directory: xla | |
| run: bazel build -c opt --config=cuda --dynamic_mode=off //xla/tools/multihost_hlo_runner:hlo_runner_main | |
| # - name: Wait For Connection | |
| # uses: google-ml-infra/actions/ci_connection@main | |
| # with: | |
| # halt-dispatch-input: ${{ inputs.halt-for-connection }} | |
| # - name: Wait For Connection | |
| # uses: google-ml-infra/actions/ci_connection@main | |
| # with: | |
| # halt-dispatch-input: ${{ inputs.halt-for-connection }} | |
| - name: Run an HLO file | |
| working-directory: xla | |
| run: | | |
| cd .. | |
| ./bazel-bin/xla/tools/multihost_hlo_runner/hlo_runner_main --device_type=gpu --log_output=True --use_spmd_partitioning --xla_gpu_dump_xspace_to=xla/results/xspace.pbtxt xla/tools/hlo_opt/tests/gpu_hlo_pass.hlo &> xla/results/gpu_hlo_backend.log | |
| # - name: Wait For Connection | |
| # uses: google-ml-infra/actions/ci_connection@main | |
| # with: | |
| # halt-dispatch-input: ${{ inputs.halt-for-connection }} | |
| # # - name: Download parse_xla_logs.py | |
| # # working-directory: xla | |
| # # run: wget https://raw.githubusercontent.com/juliagmt-google/xla/main/.github/workflows/parse_xla_logs.py | |
| # # - name: Parse XLA logs | |
| # # working-directory: xla | |
| # # run: python parse_xla_logs.py results/gpu_hlo_backend.log | |
| - name: Upload Results | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: gpu-xla-benchmarks | |
| path: xla/results | |