Create a workflow to run benchmarks #7
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Benchmarks | |
| on: | |
| pull_request: | |
| branches: | |
| - main | |
| workflow_dispatch: | |
| inputs: | |
| halt-for-connection: | |
| description: 'Should this workflow run wait for a remote connection?' | |
| type: choice | |
| required: true | |
| default: 'no' | |
| options: | |
| - 'yes' | |
| - 'no' | |
| jobs: | |
| build: | |
| strategy: | |
| matrix: | |
| runner: ["linux-x86-g2-48-l4-4gpu"] | |
| runs-on: ${{ matrix.runner }} | |
| container: | |
| image: "gcr.io/tensorflow-testing/nosla-cuda12.3-cudnn9.1-ubuntu20.04-manylinux2014-multipython:latest" | |
| env: | |
| JAXCI_HERMETIC_PYTHON_VERSION: 3.11 | |
| steps: | |
| - name: Checkout JAX Fork | |
| uses: actions/checkout@v3 | |
| with: | |
| repository: 'google-ml-infra/jax-fork' | |
| path: jax-fork | |
| - name: Install JAX Dependencies | |
| working-directory: jax-fork | |
| run: | | |
| python -m pip install --upgrade pip | |
| pip install pytest | |
| pip install absl-py | |
| pip install "jax[cuda12_pip]" # Adjust CUDA version if needed | |
| pip install google-benchmark | |
| - name: Run JAX Multiprocess GPU Test | |
| working-directory: jax-fork | |
| continue-on-error: true | |
| run: python -m pytest tests/multiprocess_gpu_test.py | |
| # - name: Build XLA GPU Atomic Test | |
| # working-directory: xla | |
| # continue-on-error: true | |
| # run: bazel build -c opt --config=cuda //xla/service/gpu/tests:gpu_atomic_test | |
| # - name: Run XLA GPU Atomic Test | |
| # working-directory: xla | |
| # continue-on-error: true | |
| # run: bazel test -c opt --config=cuda //xla/service/gpu/tests:gpu_atomic_test | |
| xla-gpu-ci: | |
| runs-on: linux-x86-g2-48-l4-4gpu | |
| steps: | |
| - name: Checkout XLA | |
| uses: actions/checkout@v3 | |
| with: | |
| repository: 'openxla/xla' | |
| path: xla | |
| - name: Pull Docker image (with parallel) | |
| run: parallel --ungroup --retries 3 --delay 15 --nonall -- docker pull us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/ml-build:latest | |
| - name: Run Docker container for build and test | |
| run: | | |
| docker run --detach --name=xla_ci --rm --interactive --tty --volume=./xla:/github/xla --workdir=/github/xla us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/ml-build:latest bash | |
| docker exec xla_ci parallel --ungroup --retries 3 --delay 15 --nonall -- bazel build --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,requires-gpu-sm75-only,requires-gpu-sm60,requires-gpu-sm70,-requires-gpu-sm80,-requires-gpu-sm80-only,-requires-gpu-sm90,-requires-gpu-sm90-only,-requires-gpu-sm100,-requires-gpu-sm100-only,-requires-gpu-amd --config=warnings --config=rbe_linux_cuda_nvcc --run_under=//tools/ci_build/gpu_build:parallel_gpu_execute --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=7.5 --@cuda_driver//:enable_forward_compatibility=true --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async --nobuild -- //xla/... //build_tools/... @tsl//tsl/... | |
| docker exec xla_ci bazel test --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,requires-gpu-sm75-only,requires-gpu-sm60,requires-gpu-sm70,-requires-gpu-sm80,-requires-gpu-sm80-only,-requires-gpu-sm90,-requires-gpu-sm90-only,-requires-gpu-sm100,-requires-gpu-sm100-only,-requires-gpu-amd --config=warnings --config=rbe_linux_cuda_nvcc --run_under=//tools/ci_build/gpu_build:parallel_gpu_execute --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=7.5 --@cuda_driver//:enable_forward_compatibility=true --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async -- //xla/... //build_tools/... @tsl//tsl/... | |
| docker exec xla_ci bazel analyze-profile profile.json.gz | |
| docker stop xla_ci | |