1+ ARG PYTHON="python3"
2+ ARG PYTHON_VERSION="3.12.10"
3+ ARG PYTHON_SHORT_VERSION="3.12"
4+ ARG CUDA_MAJOR="12"
5+ ARG CUDA_MINOR="9"
6+ ARG EFA_VERSION="1.43.1"
7+ FROM nvidia/cuda:12.9.1-base-ubuntu22.04 AS base-builder
8+
9+
10+ RUN mv /usr/local/cuda/compat /usr/local \
11+ && apt-get update \
12+ && apt-get -y upgrade --only-upgrade systemd \
13+ && apt-get install -y --allow-change-held-packages --no-install-recommends \
14+ automake \
15+ build-essential \
16+ ca-certificates \
17+ cmake \
18+ curl \
19+ emacs \
20+ git \
21+ jq \
22+ libcurl4-openssl-dev \
23+ libglib2.0-0 \
24+ libegl1 \
25+ libgl1 \
26+ libsm6 \
27+ libssl-dev \
28+ libxext6 \
29+ libxrender-dev \
30+ zlib1g-dev \
31+ unzip \
32+ vim \
33+ wget \
34+ libhwloc-dev \
35+ libgomp1 \
36+ libibverbs-dev \
37+ libnuma1 \
38+ libnuma-dev \
39+ libtool \
40+ openssl \
41+ python3-dev \
42+ autoconf \
43+ pkg-config \
44+ check \
45+ libsubunit0 \
46+ libsubunit-dev \
47+ libffi-dev \
48+ libbz2-dev \
49+ liblzma-dev \
50+ && apt-get autoremove -y \
51+ && apt-get clean \
52+ && rm -rf /var/lib/apt/lists/*
53+
54+ # #############################################################################
55+ FROM base-builder AS python-builder
56+ ARG PYTHON_VERSION
57+ COPY install_python.sh install_python.sh
58+ RUN bash install_python.sh ${PYTHON_VERSION} && rm install_python.sh
59+
60+ # #############################################################################
61+ FROM base-builder AS cuda-builder
62+ ARG CUDA_MAJOR
63+ ARG CUDA_MINOR
64+ COPY install_cuda.sh install_cuda.sh
65+ RUN bash install_cuda.sh "${CUDA_MAJOR}.${CUDA_MINOR}" && rm install_cuda.sh
66+
67+ # #############################################################################
68+ FROM nvidia/cuda:12.9.1-base-ubuntu22.04 AS final
69+ ARG PYTHON
70+ ARG PYTHON_SHORT_VERSION
71+ ARG CUDA_MAJOR
72+ ARG CUDA_MINOR
73+ ARG EFA_VERSION
74+ LABEL maintainer="Amazon AI"
75+ LABEL dlc_major_version="1"
76+ ENV DEBIAN_FRONTEND=noninteractive \
77+ LANG=C.UTF-8 \
78+ LC_ALL=C.UTF-8 \
79+ DLC_CONTAINER_TYPE=base \
80+ # Python won’t try to write .pyc or .pyo files on the import of source modules
81+ # Force stdin, stdout and stderr to be totally unbuffered. Good for logging
82+ PYTHONDONTWRITEBYTECODE=1 \
83+ PYTHONUNBUFFERED=1 \
84+ PYTHONIOENCODING=UTF-8 \
85+ CUDA_HOME="/usr/local/cuda" \
86+ PATH="/opt/amazon/openmpi/bin:/opt/amazon/efa/bin:/usr/local/cuda/bin:${PATH}" \
87+ LD_LIBRARY_PATH="/usr/local/lib:/usr/local/cuda/lib64:/opt/amazon/ofi-nccl/lib:/opt/amazon/efa/lib:/opt/amazon/openmpi/lib:${LD_LIBRARY_PATH}"
88+
89+ WORKDIR /
90+
91+ # + python and pip packages (awscli, boto3, requests)
92+ COPY --from=python-builder /usr/local/lib/python${PYTHON_SHORT_VERSION} /usr/local/lib/python${PYTHON_SHORT_VERSION}
93+ COPY --from=python-builder /usr/local/include/python${PYTHON_SHORT_VERSION} /usr/local/include/python${PYTHON_SHORT_VERSION}
94+ COPY --from=python-builder /usr/local/bin /usr/local/bin
95+ # + cuda-toolkit, cudnn, nccl
96+ COPY --from=cuda-builder /usr/local/cuda-${CUDA_MAJOR}.${CUDA_MINOR} /usr/local/cuda-${CUDA_MAJOR}.${CUDA_MINOR}
97+ COPY install_efa.sh install_efa.sh
98+ COPY deep_learning_container.py /usr/local/bin/deep_learning_container.py
99+ COPY bash_telemetry.sh /usr/local/bin/bash_telemetry.sh
100+ RUN chmod +x /usr/local/bin/deep_learning_container.py && \
101+ chmod +x /usr/local/bin/bash_telemetry.sh && \
102+ echo 'source /usr/local/bin/bash_telemetry.sh' >> /etc/bash.bashrc && \
103+ # Install EFA
104+ bash install_efa.sh ${EFA_VERSION} && \
105+ rm install_efa.sh && \
106+ # OSS compliance
107+ apt-get update && \
108+ apt-get upgrade -y && \
109+ apt-get install -y --allow-change-held-packages --no-install-recommends \
110+ unzip \
111+ wget && \
112+ apt-get clean && \
113+ HOME_DIR=/root && \
114+ curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip && \
115+ unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ && \
116+ cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance && \
117+ chmod +x /usr/local/bin/testOSSCompliance && \
118+ chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh && \
119+ ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} && \
120+ rm -rf ${HOME_DIR}/oss_compliance* && \
121+ rm -rf /tmp/tmp* && \
122+ rm -rf /var/lib/apt/lists/* && \
123+ rm -rf /root/.cache | true
124+
125+ CMD ["/bin/bash" ]
0 commit comments