11
22FROM databricksruntime/gpu-base:cuda11.8
33
4+ ARG python_version="3.10"
5+ ARG pip_version="22.3.1"
6+ ARG setuptools_version="65.6.3"
7+ ARG wheel_version="0.38.4"
8+ ARG virtualenv_version="20.16.7"
9+
410WORKDIR /databricks
511
612# Install python 3.10 from ubuntu.
713# Install pip via get-pip.py bootstrap script and install versions that match Anaconda distribution.
814RUN apt-get update \
9- && apt-get install curl software-properties-common -y python3.10 python3.10 -dev python3.10 -distutils \
15+ && apt-get install curl software-properties-common -y python${python_version} python${python_version} -dev python${python_version} -distutils \
1016 && curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py \
11- && /usr/bin/python3.10 get-pip.py pip==22.3.1 setuptools==65.6.3 wheel==0.38.4 \
17+ && /usr/bin/python${python_version} get-pip.py pip==${pip_version} setuptools==${setuptools_version} wheel==${wheel_version} \
1218 && rm get-pip.py
1319
1420
@@ -17,16 +23,17 @@ RUN apt-get update \
1723# with user cleanup and may allow users to inadvertently update pip to newer versions
1824# incompatible with Databricks. Instead, we patch virtualenv to disable periodic updates per
1925# https://virtualenv.pypa.io/en/latest/user_guide.html#embed-wheels-for-distributions.
20- RUN /usr/local/bin/pip3.10 install --no-cache-dir virtualenv==20.16.7 \
21- && sed -i -r 's/^(PERIODIC_UPDATE_ON_BY_DEFAULT) = True$/\1 = False/' /usr/local/lib/python3.10 /dist-packages/virtualenv/seed/embed/base_embed.py \
22- && /usr/local/bin/pip3.10 download pip==22.3.1 --dest \
23- /usr/local/lib/python3.10 /dist-packages/virtualenv_support/
26+ RUN /usr/local/bin/pip${python_version} install --no-cache-dir virtualenv==${virtualenv_version} \
27+ && sed -i -r 's/^(PERIODIC_UPDATE_ON_BY_DEFAULT) = True$/\1 = False/' /usr/local/lib/python${python_version} /dist-packages/virtualenv/seed/embed/base_embed.py \
28+ && /usr/local/bin/pip${python_version} download pip==${pip_version} --dest \
29+ /usr/local/lib/python${python_version} /dist-packages/virtualenv_support/
2430
2531# Create /databricks/python3 environment.
2632# We install pip and wheel so their executables show up under /databricks/python3/bin.
2733# We use `--system-site-packages` so python will fallback to system site packages.
2834# We use `--no-download` so virtualenv will install the bundled pip and wheel.
29- RUN virtualenv --python=/usr/bin/python3.10 /databricks/python3 --system-site-packages --no-download
35+ # Initialize the default environment that Spark and notebooks will use
36+ RUN virtualenv --python=python${python_version} --system-site-packages /databricks/python3 --no-download --no-setuptools
3037
3138
3239# These python libraries are used by Databricks notebooks and the Python REPL
@@ -53,6 +60,12 @@ RUN /databricks/python3/bin/pip install \
5360# Specifies where Spark will look for the python binary
5461ENV PYSPARK_PYTHON=/databricks/python3/bin/python3
5562
63+ RUN virtualenv --python=python${python_version} --system-site-packages /databricks/python-lsp --no-download --no-setuptools
64+
65+ COPY python-lsp-requirements.txt /databricks/.
66+
67+ RUN /databricks/python-lsp/bin/pip install -r /databricks/python-lsp-requirements.txt
68+
5669# Use pip cache purge to cleanup the cache safely
5770RUN /databricks/python3/bin/pip cache purge
5871
0 commit comments