Skip to content

Commit c302a4c

Browse files
authored
Merge pull request #194 from lu-wang-dl/update-gpu-container
Fix the GPU container issues
2 parents 591594c + a8b0efc commit c302a4c

File tree

3 files changed

+62
-9
lines changed

3 files changed

+62
-9
lines changed

ubuntu/gpu/cuda-11.8/base/Dockerfile

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,13 @@ RUN cd /etc/apt/sources.list.d && \
88
# See https://github.com/databricks/containers/blob/master/ubuntu/minimal/Dockerfile
99
RUN apt-get update && \
1010
apt-get install --yes \
11-
openjdk-8-jdk-headless \
11+
openjdk-8-jdk \
1212
iproute2 \
1313
bash \
1414
sudo \
1515
coreutils \
1616
procps \
17+
acl \
1718
wget && \
1819
/var/lib/dpkg/info/ca-certificates-java.postinst configure && \
1920
apt-get clean && \
@@ -35,4 +36,8 @@ RUN apt-get update \
3536
&& add-apt-repository -r "deb [arch=amd64,i386] https://cran.rstudio.com/bin/linux/ubuntu $(lsb_release -cs)-cran40/" \
3637
&& apt-key del E298A3A825C0D65DFD57CBB651716619E084DAB9 \
3738
&& apt-get clean \
38-
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
39+
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
40+
41+
# Add new user for cluster library installation
42+
RUN useradd libraries \
43+
&& usermod -L libraries

ubuntu/gpu/cuda-11.8/venv/Dockerfile

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,20 @@
11

22
FROM databricksruntime/gpu-base:cuda11.8
33

4+
ARG python_version="3.10"
5+
ARG pip_version="22.3.1"
6+
ARG setuptools_version="65.6.3"
7+
ARG wheel_version="0.38.4"
8+
ARG virtualenv_version="20.16.7"
9+
410
WORKDIR /databricks
511

612
# Install python 3.10 from ubuntu.
713
# Install pip via get-pip.py bootstrap script and install versions that match Anaconda distribution.
814
RUN apt-get update \
9-
&& apt-get install curl software-properties-common -y python3.10 python3.10-dev python3.10-distutils \
15+
&& apt-get install curl software-properties-common -y python${python_version} python${python_version}-dev python${python_version}-distutils \
1016
&& curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py \
11-
&& /usr/bin/python3.10 get-pip.py pip==22.3.1 setuptools==65.6.3 wheel==0.38.4 \
17+
&& /usr/bin/python${python_version} get-pip.py pip==${pip_version} setuptools==${setuptools_version} wheel==${wheel_version} \
1218
&& rm get-pip.py
1319

1420

@@ -17,16 +23,17 @@ RUN apt-get update \
1723
# with user cleanup and may allow users to inadvertently update pip to newer versions
1824
# incompatible with Databricks. Instead, we patch virtualenv to disable periodic updates per
1925
# https://virtualenv.pypa.io/en/latest/user_guide.html#embed-wheels-for-distributions.
20-
RUN /usr/local/bin/pip3.10 install --no-cache-dir virtualenv==20.16.7 \
21-
&& sed -i -r 's/^(PERIODIC_UPDATE_ON_BY_DEFAULT) = True$/\1 = False/' /usr/local/lib/python3.10/dist-packages/virtualenv/seed/embed/base_embed.py \
22-
&& /usr/local/bin/pip3.10 download pip==22.3.1 --dest \
23-
/usr/local/lib/python3.10/dist-packages/virtualenv_support/
26+
RUN /usr/local/bin/pip${python_version} install --no-cache-dir virtualenv==${virtualenv_version} \
27+
&& sed -i -r 's/^(PERIODIC_UPDATE_ON_BY_DEFAULT) = True$/\1 = False/' /usr/local/lib/python${python_version}/dist-packages/virtualenv/seed/embed/base_embed.py \
28+
&& /usr/local/bin/pip${python_version} download pip==${pip_version} --dest \
29+
/usr/local/lib/python${python_version}/dist-packages/virtualenv_support/
2430

2531
# Create /databricks/python3 environment.
2632
# We install pip and wheel so their executables show up under /databricks/python3/bin.
2733
# We use `--system-site-packages` so python will fallback to system site packages.
2834
# We use `--no-download` so virtualenv will install the bundled pip and wheel.
29-
RUN virtualenv --python=/usr/bin/python3.10 /databricks/python3 --system-site-packages --no-download
35+
# Initialize the default environment that Spark and notebooks will use
36+
RUN virtualenv --python=python${python_version} --system-site-packages /databricks/python3 --no-download --no-setuptools
3037

3138

3239
# These python libraries are used by Databricks notebooks and the Python REPL
@@ -53,6 +60,12 @@ RUN /databricks/python3/bin/pip install \
5360
# Specifies where Spark will look for the python binary
5461
ENV PYSPARK_PYTHON=/databricks/python3/bin/python3
5562

63+
RUN virtualenv --python=python${python_version} --system-site-packages /databricks/python-lsp --no-download --no-setuptools
64+
65+
COPY python-lsp-requirements.txt /databricks/.
66+
67+
RUN /databricks/python-lsp/bin/pip install -r /databricks/python-lsp-requirements.txt
68+
5669
# Use pip cache purge to cleanup the cache safely
5770
RUN /databricks/python3/bin/pip cache purge
5871

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
astroid==3.0.1
2+
attrs==23.1.0
3+
autopep8==2.0.4
4+
cattrs==23.2.3
5+
dill==0.3.7
6+
docstring-to-markdown==0.13
7+
exceptiongroup==1.1.3
8+
flake8==6.1.0
9+
importlib-metadata==6.8.0
10+
isort==5.12.0
11+
jedi==0.19.1
12+
lsprotocol==2023.0.0
13+
mccabe==0.7.0
14+
packaging==23.2
15+
parso==0.8.3
16+
platformdirs==4.0.0
17+
pluggy==1.3.0
18+
pycodestyle==2.11.1
19+
pydocstyle==6.3.0
20+
pyflakes==3.1.0
21+
pylint==3.0.2
22+
python-lsp-jsonrpc==1.1.2
23+
python-lsp-ruff==1.6.0
24+
python-lsp-server==1.10.0
25+
pytoolconfig==1.2.6
26+
rope==1.12.0
27+
ruff==0.1.6
28+
snowballstemmer==2.2.0
29+
tomli==2.0.1
30+
tomlkit==0.12.3
31+
typing_extensions==4.8.0
32+
ujson==5.8.0
33+
whatthepatch==1.0.5
34+
yapf==0.40.2
35+
zipp==3.17.0

0 commit comments

Comments
 (0)