-
Notifications
You must be signed in to change notification settings - Fork 348
Expand file tree
/
Copy pathpackage_sources.Dockerfile
More file actions
231 lines (215 loc) · 13.2 KB
/
package_sources.Dockerfile
File metadata and controls
231 lines (215 loc) · 13.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
# ============================================================================ #
# Copyright (c) 2022 - 2026 NVIDIA Corporation & Affiliates. #
# All rights reserved. #
# #
# This source code and the accompanying materials are made available under #
# the terms of the Apache License 2.0 which accompanies this distribution. #
# ============================================================================ #
#
# Builds an Ubuntu 24.04 image containing source code for a set of apt and pip
# packages plus the repo's tpls/ (third-party library) source. Tpls are cloned
# at build time using .gitmodules and a lock file (commit + path per line) via
# git clone --no-checkout --filter=tree:0 + fetch + checkout.
#
# Build from repo root with package-source-diff/ and tpls_commits.lock (or generate with scripts/generate_tpls_lock.sh).
# Trimmed lists are produced inline in the build_package_sources workflow.
# docker build -t package-sources:latest -f docker/build/package_sources.Dockerfile .
#
# base_image is the base image to use for the build.
#
# Expects in build context:
# package-source-diff/apt_packages_cudaq.txt - one apt package name per line (cudaq)
# package-source-diff/pip_packages_cudaq.txt - one pip package==version per line (cudaq)
# package-source-diff/apt_packages_cudaqx.txt - one apt package name per line (cudaqx)
# package-source-diff/pip_packages_cudaqx.txt - one pip package==version per line (cudaqx)
# package-source-diff/apt_packages_cudaqx_trimmed.txt - cudaqx apt not in cudaq
# package-source-diff/pip_packages_cudaqx_trimmed.txt - cudaqx pip not in cudaq
# package-source-diff/pip_packages_macos.txt - one pip package==version per line (macos)
# package-source-diff/pip_packages_macos_trimmed.txt - macos pip not in cudaq or cudaqx
# tpls_commits.lock - "<commit> <path>" per submodule (same as install_prerequisites.sh -l)
# .gitmodules - submodule paths and URLs
# scripts/clone_tpls_from_lock.sh - clone script
# NOTICE, LICENSE - attribution
# NOTICE_PIP_cudaq_cu<N>, NOTICE_PIP_cudaqx_cu<N>, NOTICE_PIP_macos_cu<N> - pip attribution per variant (from generate_pip_attribution.py)
# NOTICE_APT_cudaq_cu<N>, NOTICE_APT_cudaqx_cu<N> - apt attribution per variant (from generate_apt_attribution.py)
ARG base_image=ubuntu:24.04
FROM ${base_image}
SHELL ["/bin/bash", "-c"]
ARG DEBIAN_FRONTEND=noninteractive
# Install deps for fetching apt source, pip sdists, and cloning tpls
RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates \
build-essential \
curl \
dpkg-dev \
git \
jq \
python3 \
python3-pip \
unzip \
&& python3 -m pip install --upgrade unearth --break-system-packages \
&& apt-get clean && rm -rf /var/lib/apt/lists/*
# Install necessary repository for librdmac1
RUN apt-get update && apt-get install -y --no-install-recommends gnupg wget \
&& wget -qO - "https://www.mellanox.com/downloads/ofed/RPM-GPG-KEY-Mellanox" | apt-key add - \
&& mkdir -p /etc/apt/sources.list.d && wget -q -nc --no-check-certificate -P /etc/apt/sources.list.d "https://linux.mellanox.com/public/repo/mlnx_ofed/5.3-1.0.0.1/ubuntu20.04/mellanox_mlnx_ofed.list" \
&& echo 'deb-src http://linux.mellanox.com/public/repo/mlnx_ofed/5.3-1.0.0.1/ubuntu20.04/$(ARCH) ./' >> /etc/apt/sources.list.d/mellanox_mlnx_ofed.list \
&& apt-get update -y
# Enable source repositories (Ubuntu 24.04 DEB822 format)
RUN if [ -f /etc/apt/sources.list.d/ubuntu.sources ]; then \
sed -i 's/^Types: deb$/Types: deb deb-src/' /etc/apt/sources.list.d/ubuntu.sources; \
else \
sed -i '/^# deb-src/s/^# //' /etc/apt/sources.list 2>/dev/null || true; \
fi
RUN apt-get update
ENV SOURCES_ROOT=/sources
RUN mkdir -p "${SOURCES_ROOT}/NOTICES" \
"${SOURCES_ROOT}/cudaq/apt" \
"${SOURCES_ROOT}/cudaq/pip" \
"${SOURCES_ROOT}/cudaqx/apt" \
"${SOURCES_ROOT}/cudaqx/pip" \
"${SOURCES_ROOT}/tpls" \
"${SOURCES_ROOT}/.scripts" \
"${SOURCES_ROOT}/macos/pip"
ENV SCRIPTS_DIR=${SOURCES_ROOT}/.scripts
# Copy .gitmodules, tpls lock file, clone script, package lists, pip sdist fetcher, and prereq sources
COPY .gitmodules "${SCRIPTS_DIR}"/.gitmodules
COPY tpls_commits.lock "${SCRIPTS_DIR}"/tpls_commits.lock
COPY scripts/clone_tpls_from_lock.sh "${SCRIPTS_DIR}"/clone_tpls_from_lock.sh
COPY package-source-diff/apt_packages_cudaq.txt package-source-diff/apt_packages_cudaqx.txt package-source-diff/apt_packages_cudaqx_trimmed.txt "${SCRIPTS_DIR}"/
COPY package-source-diff/pip_packages_cudaq.txt package-source-diff/pip_packages_cudaqx.txt package-source-diff/pip_packages_cudaqx_trimmed.txt package-source-diff/pip_packages_macos.txt package-source-diff/pip_packages_macos_trimmed.txt "${SCRIPTS_DIR}"/
COPY prereqs/ "${SOURCES_ROOT}/prereqs/"
# Copy attribution into NOTICES folder (NOTICE_PIP_* and NOTICE_APT_* generated per variant with CUDA version, e.g. NOTICE_PIP_cudaq_cu12)
COPY NOTICE LICENSE "${SOURCES_ROOT}/NOTICES/"
COPY NOTICE_PIP_cudaq_cu* NOTICE_PIP_cudaqx_cu* NOTICE_PIP_macos_cu* "${SOURCES_ROOT}/NOTICES/"
COPY NOTICE_APT_cudaq_cu* NOTICE_APT_cudaqx_cu* "${SOURCES_ROOT}/NOTICES/"
# Fetch apt source, pip sdists, and clone tpls in parallel (prefix lines so logs stay readable)
RUN apt-get update && set -o pipefail && \
( set -o pipefail; cd "${SOURCES_ROOT}/cudaq/apt" && \
chmod 777 . && \
: > "${SOURCES_ROOT}/cudaq/apt/apt_omitted_packages.txt" && \
for list in "${SCRIPTS_DIR}"/apt_packages_cudaq.txt; do \
[ -f "$list" ] && while IFS= read -r pkg || [ -n "$pkg" ]; do \
[ -z "$pkg" ] && continue; \
apt-get source -y "$pkg" || echo "$pkg" >> "${SOURCES_ROOT}/cudaq/apt/apt_omitted_packages.txt"; \
done < "$list"; \
done; \
) 2>&1 | sed 's/^/[cudaq-apt] /' & \
( set -o pipefail; cd "${SOURCES_ROOT}/cudaqx/apt" && \
chmod 777 . && \
: > "${SOURCES_ROOT}/cudaqx/apt/apt_omitted_packages.txt" && \
for list in "${SCRIPTS_DIR}"/apt_packages_cudaqx_trimmed.txt; do \
[ -f "$list" ] && while IFS= read -r pkg || [ -n "$pkg" ]; do \
[ -z "$pkg" ] && continue; \
apt-get source -y "$pkg" || echo "$pkg" >> "${SOURCES_ROOT}/cudaqx/apt/apt_omitted_packages.txt"; \
done < "$list"; \
done; \
) 2>&1 | sed 's/^/[cudaqx-apt] /' & \
( set -o pipefail; : > "${SOURCES_ROOT}/cudaq/pip/pip_omitted_packages.txt" && \
cd "${SOURCES_ROOT}/cudaq/pip" && \
for list in "${SCRIPTS_DIR}"/pip_packages_cudaq.txt; do \
[ -f "$list" ] && while IFS= read -r package || [ -n "$package" ]; do \
[ -z "$package" ] && continue; \
url=$(unearth --no-binary "$package" 2>/dev/null | jq -r '.link.url'); \
if [ -n "$url" ] && [ "$url" != "null" ]; then \
curl -fsSL -O "$url" || echo "$package" >> pip_omitted_packages.txt; \
else \
echo "$package" >> pip_omitted_packages.txt; \
fi; \
done < "$list"; \
done; \
) 2>&1 | sed 's/^/[cudaq-pip] /' & \
( set -o pipefail; : > "${SOURCES_ROOT}/cudaqx/pip/pip_omitted_packages.txt" && \
cd "${SOURCES_ROOT}/cudaqx/pip" && \
for list in "${SCRIPTS_DIR}"/pip_packages_cudaqx_trimmed.txt; do \
[ -f "$list" ] && while IFS= read -r package || [ -n "$package" ]; do \
[ -z "$package" ] && continue; \
url=$(unearth --no-binary "$package" 2>/dev/null | jq -r '.link.url'); \
if [ -n "$url" ] && [ "$url" != "null" ]; then \
curl -fsSL -O "$url" || echo "$package" >> pip_omitted_packages.txt; \
else \
echo "$package" >> pip_omitted_packages.txt; \
fi; \
done < "$list"; \
done; \
) 2>&1 | sed 's/^/[cudaqx-pip] /' & \
( set -o pipefail; : > "${SOURCES_ROOT}/macos/pip/macos_pip_omitted_packages.txt" && \
cd "${SOURCES_ROOT}/macos/pip" && \
for list in "${SCRIPTS_DIR}"/pip_packages_macos_trimmed.txt; do \
[ -f "$list" ] && while IFS= read -r package || [ -n "$package" ]; do \
[ -z "$package" ] && continue; \
url=$(unearth --no-binary "$package" 2>/dev/null | jq -r '.link.url'); \
if [ -n "$url" ] && [ "$url" != "null" ]; then \
curl -fsSL -O "$url" || echo "$package" >> macos_pip_omitted_packages.txt; \
else \
echo "$package" >> macos_pip_omitted_packages.txt; \
fi; \
done < "$list"; \
done; \
) 2>&1 | sed 's/^/[macos-pip] /' & \
( set -o pipefail; SOURCES_ROOT="${SOURCES_ROOT}" GITMODULES="${SCRIPTS_DIR}"/.gitmodules lock_file="${SCRIPTS_DIR}"/tpls_commits.lock \
bash "${SCRIPTS_DIR}"/clone_tpls_from_lock.sh ) 2>&1 | sed 's/^/[tpls] /' & \
wait
# Extract pip sdists and remove tarballs
RUN for dir in "${SOURCES_ROOT}/cudaq/pip" "${SOURCES_ROOT}/cudaqx/pip" "${SOURCES_ROOT}/macos/pip"; do \
cd "$dir" && \
for f in *.tar.gz *.tgz; do \
[ -f "$f" ] || continue; \
tar -xzf "$f" && rm -f "$f"; \
done; \
for f in *.zip; do \
[ -f "$f" ] || continue; \
unzip -q -o "$f" && rm -f "$f"; \
done; \
done
RUN echo -e "CUDAQ apt omitted packages:\n$(cat ${SOURCES_ROOT}/cudaq/apt/apt_omitted_packages.txt)"
RUN echo -e "CUDAQX apt omitted packages:\n$(cat ${SOURCES_ROOT}/cudaqx/apt/apt_omitted_packages.txt)"
RUN echo -e "CUDAQ pip omitted packages:\n$(cat ${SOURCES_ROOT}/cudaq/pip/pip_omitted_packages.txt)"
RUN echo -e "CUDAQX pip omitted packages:\n$(cat ${SOURCES_ROOT}/cudaqx/pip/pip_omitted_packages.txt)"
RUN echo -e "MACOS pip omitted packages:\n$(cat ${SOURCES_ROOT}/macos/pip/macos_pip_omitted_packages.txt)"
# For omitted apt packages (no source available), extract license/copyright/EULA from the .deb
RUN echo "Retrieving EULA/copyright for omitted apt packages..." && \
mkdir -p "${SOURCES_ROOT}/omitted_licenses/apt/" /tmp/deb_extract && \
while IFS= read -r pkg || [ -n "$pkg" ]; do \
[ -z "$pkg" ] && continue; \
( cd /tmp/deb_extract && apt-get download "$pkg" 2>/dev/null ) || true; \
deb=$(ls /tmp/deb_extract/*.deb 2>/dev/null | head -1); \
if [ -n "$deb" ]; then \
dpkg-deb -R "$deb" "/tmp/deb_extract/${pkg}_pkg" 2>/dev/null || true; \
dest="${SOURCES_ROOT}/omitted_licenses/apt/${pkg}"; \
mkdir -p "$dest"; \
find "/tmp/deb_extract/${pkg}_pkg" \( -iname "*license*" -o -iname "*eula*" -o -iname "*copyright*" \) -exec cp -a {} "$dest/" \; 2>/dev/null || true; \
rm -rf "/tmp/deb_extract/${pkg}_pkg" /tmp/deb_extract/*.deb; \
fi; \
done < <(for f in "${SOURCES_ROOT}/cudaq/apt/apt_omitted_packages.txt" "${SOURCES_ROOT}/cudaqx/apt/apt_omitted_packages.txt"; do [ -f "$f" ] && cat "$f"; done); \
rm -rf /tmp/deb_extract
# For omitted pip packages (no sdist), get EULA/license from the wheel: fetch wheel from PyPI, extract, copy license/EULA/copyright files
RUN echo "Retrieving EULA/license for omitted pip packages..." && \
mkdir -p "${SOURCES_ROOT}/omitted_licenses/pip" /tmp/wheel_extract && \
while IFS= read -r package || [ -n "$package" ]; do \
[ -z "$package" ] && continue; \
name="${package%%==*}"; \
version="${package#*==}"; \
[ -z "$name" ] || [ -z "$version" ] || [ "$version" = "$package" ] && continue; \
url=$(curl -sS "https://pypi.org/pypi/${name}/${version}/json" 2>/dev/null | jq -r '.urls[] | select(.packagetype=="bdist_wheel") | select(.filename | test("manylinux.*x86_64|manylinux_2.*x86_64")) | .url' 2>/dev/null | head -1); \
if [ -n "$url" ] && [ "$url" != "null" ]; then \
if curl -fsSL -o /tmp/pip_wheel.whl "$url" 2>/dev/null; then \
(cd /tmp/wheel_extract && unzip -o -q /tmp/pip_wheel.whl 2>/dev/null) || true; \
dest="${SOURCES_ROOT}/omitted_licenses/pip/${name}"; \
mkdir -p "$dest"; \
find /tmp/wheel_extract -type f \( -iname "*license*" -o -iname "*eula*" -o -iname "*copyright*" \) -exec cp -an {} "$dest/" \; 2>/dev/null || true; \
if [ -z "$(ls -A "$dest" 2>/dev/null)" ]; then \
license_text=$(curl -sS "https://pypi.org/pypi/${name}/${version}/json" 2>/dev/null | jq -r '.info.license // .info.license_expression // empty'); \
[ -n "$license_text" ] && [ "$license_text" != "null" ] && echo "$license_text" > "$dest/LICENSE_from_PyPI.txt"; \
fi; \
find /tmp/wheel_extract -mindepth 1 -delete 2>/dev/null || rm -rf /tmp/wheel_extract/*; \
fi; \
rm -f /tmp/pip_wheel.whl; \
fi; \
done < <(for f in "${SOURCES_ROOT}/cudaq/pip/pip_omitted_packages.txt" "${SOURCES_ROOT}/cudaqx/pip/pip_omitted_packages.txt" "${SOURCES_ROOT}/macos/pip/macos_pip_omitted_packages.txt"; do [ -f "$f" ] && cat "$f"; done); \
rm -rf /tmp/wheel_extract
# Summary
RUN echo "apt: $(find ${SOURCES_ROOT}/apt -maxdepth 1 -type d 2>/dev/null | wc -l) dirs" && \
echo "pip: $(find ${SOURCES_ROOT}/pip -maxdepth 1 -type f \( -name '*.tar.gz' -o -name '*.zip' \) 2>/dev/null | wc -l) sdists" && \
echo "tpls: $(find ${SOURCES_ROOT}/tpls -maxdepth 1 -mindepth 1 -type d 2>/dev/null | wc -l) libraries"
WORKDIR ${SOURCES_ROOT}