Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion buildlib/az-distro-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,12 @@ jobs:
ubuntu24_cuda13_${{ parameters.arch }}:
build_container: ubuntu24_cuda13_${{ parameters.arch }}
artifact_name: $(POSTFIX)-ubuntu24.04-mofed5-cuda13-${{ parameters.arch }}.tar.bz2
rocky8_cuda13_${{ parameters.arch }}:
build_container: rocky8_cuda13_${{ parameters.arch }}
artifact_name: $(POSTFIX)-rocky8-mofed24.10-cuda13-${{ parameters.arch }}.tar.bz2
rocky9_cuda13_${{ parameters.arch }}:
build_container: rocky9_cuda13_${{ parameters.arch }}
artifact_name: $(POSTFIX)-rocky9-mofed24.10-cuda13-${{ parameters.arch }}.tar.bz2
# x86 only
${{ if eq(parameters.arch, 'x86_64') }}:
centos7_cuda11_${{ parameters.arch }}:
Expand Down Expand Up @@ -84,7 +90,7 @@ jobs:
cd ../../..
tar -tjf "${AZ_ARTIFACT_NAME}"
displayName: Build RPM package
condition: and(succeeded(), contains(variables['artifact_name'], 'centos'))
condition: and(succeeded(), or(contains(variables['artifact_name'], 'centos'), contains(variables['artifact_name'], 'rocky')))
env:
AZ_ARTIFACT_NAME: $(artifact_name)

Expand Down
12 changes: 12 additions & 0 deletions buildlib/azure-pipelines-release-drp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,12 @@ resources:
image: $(REPO_MIRROR)/ucx/x86_64/ubuntu22.04-mofed5-cuda13:1
- container: ubuntu24_cuda13_x86_64
image: $(REPO_MIRROR)/ucx/x86_64/ubuntu24.04-mofed24.10-cuda13:1
- container: rocky8_cuda13_x86_64
image: $(REPO_MIRROR)/ucx/x86_64/rocky8-mofed24.10-cuda13:1
options: $(DOCKER_OPT_VOLUMES)
- container: rocky9_cuda13_x86_64
image: $(REPO_MIRROR)/ucx/x86_64/rocky9-mofed24.10-cuda13:1
options: $(DOCKER_OPT_VOLUMES)

# aarch64
- container: centos8_cuda11_aarch64
Expand All @@ -69,6 +75,12 @@ resources:
image: $(REPO_MIRROR)/ucx/aarch64/ubuntu22.04-mofed5-cuda13:1
- container: ubuntu24_cuda13_aarch64
image: $(REPO_MIRROR)/ucx/aarch64/ubuntu24.04-mofed24.10-cuda13:1
- container: rocky8_cuda13_aarch64
image: $(REPO_MIRROR)/ucx/aarch64/rocky8-mofed24.10-cuda13:2
options: $(DOCKER_OPT_VOLUMES)
- container: rocky9_cuda13_aarch64
image: $(REPO_MIRROR)/ucx/aarch64/rocky9-mofed24.10-cuda13:2
options: $(DOCKER_OPT_VOLUMES)

stages:
- stage: Prepare
Expand Down
12 changes: 12 additions & 0 deletions buildlib/azure-pipelines-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@ resources:
image: rdmz-harbor.rdmz.labs.mlnx/ucx/x86_64/ubuntu22.04-mofed5-cuda13:1
- container: ubuntu24_cuda13_x86_64
image: rdmz-harbor.rdmz.labs.mlnx/ucx/x86_64/ubuntu24.04-mofed24.10-cuda13:1
- container: rocky8_cuda13_x86_64
image: rdmz-harbor.rdmz.labs.mlnx/ucx/x86_64/rocky8-mofed24.10-cuda13:1
options: $(DOCKER_OPT_VOLUMES)
- container: rocky9_cuda13_x86_64
image: rdmz-harbor.rdmz.labs.mlnx/ucx/x86_64/rocky9-mofed24.10-cuda13:1
options: $(DOCKER_OPT_VOLUMES)

# aarch64
- container: centos8_cuda11_aarch64
Expand All @@ -65,6 +71,12 @@ resources:
image: rdmz-harbor.rdmz.labs.mlnx/ucx/aarch64/ubuntu22.04-mofed5-cuda13:1
- container: ubuntu24_cuda13_aarch64
image: rdmz-harbor.rdmz.labs.mlnx/ucx/aarch64/ubuntu24.04-mofed24.10-cuda13:1
- container: rocky8_cuda13_aarch64
image: rdmz-harbor.rdmz.labs.mlnx/ucx/aarch64/rocky8-mofed24.10-cuda13:2
options: $(DOCKER_OPT_VOLUMES)
- container: rocky9_cuda13_aarch64
image: rdmz-harbor.rdmz.labs.mlnx/ucx/aarch64/rocky9-mofed24.10-cuda13:2
options: $(DOCKER_OPT_VOLUMES)

stages:
- stage: Prepare
Expand Down
26 changes: 26 additions & 0 deletions buildlib/dockers/docker-compose-aarch64.yml
Original file line number Diff line number Diff line change
Expand Up @@ -117,3 +117,29 @@ services:
CUDA_VERSION: 13.0.0
NV_DRIVER_VERSION: 580
ARCH: aarch64
rocky8-mofed5-cuda13:
image: rocky8-mofed24.10-cuda13:2
build:
context: .
network: host
dockerfile: rocky-release.Dockerfile
args:
MOFED_VERSION: 24.10-3.2.5.0
MOFED_OS: rhel8.9
OS_VERSION: 8
CUDA_VERSION: 13.0.0
NV_DRIVER_VERSION: 580
ARCH: aarch64
rocky9-mofed5-cuda13:
image: rocky9-mofed24.10-cuda13:2
build:
context: .
network: host
dockerfile: rocky-release.Dockerfile
args:
MOFED_VERSION: 24.10-3.2.5.0
MOFED_OS: rhel9.6
OS_VERSION: 9
CUDA_VERSION: 13.0.0
NV_DRIVER_VERSION: 580
ARCH: aarch64
26 changes: 26 additions & 0 deletions buildlib/dockers/docker-compose-x86_64.yml
Original file line number Diff line number Diff line change
Expand Up @@ -153,3 +153,29 @@ services:
CUDA_VERSION: 13.0.0
NV_DRIVER_VERSION: 580
ARCH: x86_64
rocky8-mofed5-cuda13:
image: rocky8-mofed24.10-cuda13:1
build:
context: .
network: host
dockerfile: rocky-release.Dockerfile
args:
MOFED_VERSION: 24.10-3.2.5.0
MOFED_OS: rhel8.9
OS_VERSION: 8
CUDA_VERSION: 13.0.0
NV_DRIVER_VERSION: 580
ARCH: x86_64
rocky9-mofed5-cuda13:
image: rocky9-mofed24.10-cuda13:1
build:
context: .
network: host
dockerfile: rocky-release.Dockerfile
args:
MOFED_VERSION: 24.10-3.2.5.0
MOFED_OS: rhel9.6
OS_VERSION: 9
CUDA_VERSION: 13.0.0
NV_DRIVER_VERSION: 580
ARCH: x86_64
62 changes: 62 additions & 0 deletions buildlib/dockers/rocky-release.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
ARG CUDA_VERSION
ARG OS_VERSION
FROM nvidia/cuda:${CUDA_VERSION}-devel-rockylinux${OS_VERSION}

ARG MOFED_VERSION
ARG MOFED_OS
ARG ARCH

RUN yum install -y \
autoconf \
automake \
environment-modules \
ethtool \
file \
fuse-libs \
gcc-c++ \
git \
glibc-devel \
libtool \
libusbx \
lsof \
make \
maven \
numactl-devel \
pciutils \
perl \
pinentry \
python3 \
rdma-core-devel \
rpm-build \
tcl \
tcsh \
tk \
valgrind-devel \
wget \
&& yum clean all \
&& rm -rf /var/cache/yum

ENV MOFED_DIR=MLNX_OFED_LINUX-${MOFED_VERSION}-${MOFED_OS}-${ARCH} \
MOFED_SITE_PLACE=MLNX_OFED-${MOFED_VERSION} \
CPATH=/usr/local/cuda/include:${CPATH} \
LD_LIBRARY_PATH=/usr/local/cuda/lib64:${LD_LIBRARY_PATH} \
LIBRARY_PATH=/usr/local/cuda/lib64:${LIBRARY_PATH}

RUN wget --no-verbose http://content.mellanox.com/ofed/${MOFED_SITE_PLACE}/${MOFED_DIR}.tgz \
&& tar -xzf ${MOFED_DIR}.tgz \
&& ${MOFED_DIR}/mlnxofedinstall --basic -q \
--user-space-only \
--without-fw-update \
--skip-distro-check \
--without-ucx \
--without-hcoll \
--without-openmpi \
--without-sharp \
--distro ${MOFED_OS} \
# MOFED sets memlock unlimited (required for RDMA runtime), but this breaks su in
# unprivileged containers. Safe to remove for CI build containers.
&& sed -i '/memlock/d' /etc/security/limits.conf \
&& rm -rf ${MOFED_DIR} *.tgz \
&& cd /usr/lib64 \
&& ln -s libudev.so.1 libudev.so \
&& ln -s libz.so.1 libz.so
4 changes: 4 additions & 0 deletions buildlib/pr/cuda/cuda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,10 @@ jobs:
CONTAINER: ubuntu22_cuda_13_0
ubuntu24_cuda_13_0:
CONTAINER: ubuntu24_cuda_13_0
rocky8_cuda_13_0:
CONTAINER: rocky8_cuda_13_0
rocky9_cuda_13_0:
CONTAINER: rocky9_cuda_13_0

container: $[ variables['CONTAINER'] ]
timeoutInMinutes: 35
Expand Down
6 changes: 6 additions & 0 deletions buildlib/pr/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,12 @@ resources:
- container: ubuntu24_cuda_13_0
image: nvidia/cuda:13.0.0-devel-ubuntu24.04
options: $(DOCKER_OPT_ARGS) $(DOCKER_OPT_GPU)
- container: rocky8_cuda_13_0
image: nvidia/cuda:13.0.0-devel-rockylinux8
options: $(DOCKER_OPT_ARGS) $(DOCKER_OPT_GPU)
- container: rocky9_cuda_13_0
image: nvidia/cuda:13.0.0-devel-rockylinux9
options: $(DOCKER_OPT_ARGS) $(DOCKER_OPT_GPU)
- container: centos8_cuda11
image: rdmz-harbor.rdmz.labs.mlnx/ucx/centos8-mofed5-cuda11:1
options: $(DOCKER_OPT_ARGS) $(DOCKER_OPT_VOLUMES) $(DOCKER_OPT_GPU)
Expand Down
Loading