Skip to content

[xla:gpu] Unify CUDA allocators under cuMemCreate allocator #4164

[xla:gpu] Unify CUDA allocators under cuMemCreate allocator

[xla:gpu] Unify CUDA allocators under cuMemCreate allocator #4164

Workflow file for this run

# Copyright 2026 The OpenXLA Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
name: CI ROCm
permissions:
contents: read
on:
pull_request:
push:
branches:
- main
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.ref }}
cancel-in-progress: ${{ github.ref != 'main' }}
jobs:
rocm-config:
runs-on: ubuntu-latest
outputs:
docker-image: ${{ steps.out.outputs.docker-image }}
steps:
- id: out
shell: bash
run: |
# rocm/tensorflow-build:latest-jammy-pythonall-rocm7.2.1-ci_official
echo "docker-image=rocm/tensorflow-build@sha256:66eb4c1e39db76fae2eb0a1029490acbe7bfce0e00d6ab435e170f743921f4c4" >> "$GITHUB_OUTPUT"
jax:
needs: rocm-config
if: ${{ github.event_name != 'pull_request' || github.base_ref == 'main' }}
name: JAX Linux x86 AMD Instinct GPU
runs-on: linux-x86-64-1gpu-amd
env:
DOCKER_IMAGE: ${{ needs.rocm-config.outputs.docker-image }}
container: &rocm_container
image: ${{ needs.rocm-config.outputs.docker-image }}
volumes:
- /data/ci-cert.crt:/data/ci-cert.crt
- /data/ci-cert.key:/data/ci-cert.key
- /data:/data
options: >-
--device=/dev/dri
--device=/dev/kfd
--ipc=host
--shm-size=64G
--cap-add=SYS_PTRACE
--security-opt=seccomp=unconfined
--tmpfs /root/.cache/bazel:rw,exec,size=80g
--group-add video
defaults:
run:
shell: bash
steps:
- name: Checking out openxla/xla
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- name: Checkout JAX
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
repository: jax-ml/jax
path: jax
persist-credentials: false
- name: CPU Info
run: lscpu
- name: ROCm info
run: /opt/rocm/bin/rocminfo
- name: Run JAX Unit Tests
timeout-minutes: 120
working-directory: jax
run: |
./ci/run_bazel_test_rocm_rbe.sh \
--override_repository=xla=${GITHUB_WORKSPACE} \
--config=single_gpu \
--//jax:build_jaxlib=wheel \
--//jax:build_jax=true \
--local_test_jobs=4 \
--action_env=JAX_ENABLE_X64=1 \
--repo_env=HERMETIC_PYTHON_VERSION=3.14 \
--repo_env=TF_ROCM_RBE_DOCKER_IMAGE=${DOCKER_IMAGE}
xla:
needs: rocm-config
name: XLA Linux x86 AMD Instinct GPU
runs-on: linux-x86-64-4gpu-amd
env:
DOCKER_IMAGE: ${{ needs.rocm-config.outputs.docker-image }}
EXECUTE_CI_BUILD_URL: https://raw.githubusercontent.com/ROCm/xla/refs/heads/rocm-dev-infra/build_tools/rocm/execute_ci_build_upstream.sh
container: *rocm_container
defaults:
run:
shell: bash
steps:
- name: Checking out openxla/xla
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- name: Fetch ROCm XLA Upstream Script from ROCm/xla
run: |
wget -O build_tools/rocm/execute_ci_build_upstream.sh ${EXECUTE_CI_BUILD_URL}
chmod +x build_tools/rocm/execute_ci_build_upstream.sh
- name: CPU Info
run: lscpu
- name: ROCm info
run: /opt/rocm/bin/rocminfo
- name: Test XLA [single_gpu]
timeout-minutes: 120
run: |
build_tools/rocm/execute_ci_build_upstream.sh \
--config=rocm_ci \
--config=rocm_rbe \
--config=ci_single_gpu \
--local_test_jobs=4 \
--repo_env=TF_ROCM_RBE_SINGLE_GPU_POOL=linux_x64_gpu_gfx90a \
--repo_env=TF_ROCM_RBE_DOCKER_IMAGE=${DOCKER_IMAGE}
- name: Test XLA [multi_gpu]
timeout-minutes: 120
run: |
build_tools/rocm/execute_ci_build_upstream.sh \
--config=rocm_ci \
--config=rocm_rbe \
--config=ci_multi_gpu \
--local_test_jobs=1 \
--strategy=TestRunner=local \
--repo_env=TF_ROCM_RBE_DOCKER_IMAGE=${DOCKER_IMAGE}