Skip to content

Commit 84df82d

Browse files
authored
Upgrade to Tensorflow 2.6 (#182)
http://b/177304566
1 parent 45fffc8 commit 84df82d

File tree

4 files changed

+25
-28
lines changed

4 files changed

+25
-28
lines changed

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ RUN R -e 'reticulate::install_miniconda()'
5858
ENV RETICULATE_PYTHON=/root/.local/share/r-miniconda/envs/r-reticulate/bin/python
5959

6060
# Tensorflow and Keras
61-
RUN R -e 'keras::install_keras(tensorflow = "2.3", extra_packages = c("pandas", "numpy", "pycryptodome"), method="conda")'
61+
RUN R -e 'keras::install_keras(tensorflow = "2.6", extra_packages = c("pandas", "numpy", "pycryptodome"), method="conda")'
6262

6363
# Install kaggle libraries.
6464
# Do this at the end to avoid rebuilding everything when any change is made.

gpu.Dockerfile

Lines changed: 17 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,20 @@
11
ARG BASE_TAG=staging
2-
FROM nvidia/cuda:10.2-cudnn7-devel-ubuntu18.04 AS nvidia
2+
FROM nvidia/cuda:11.4.2-cudnn8-devel-ubuntu18.04 AS nvidia
33
FROM gcr.io/kaggle-images/rstats:${BASE_TAG}
44
ARG ncpus=1
55

66
ADD clean-layer.sh /tmp/clean-layer.sh
77

88
# Cuda support
99
COPY --from=nvidia /etc/apt/sources.list.d/cuda.list /etc/apt/sources.list.d/
10-
COPY --from=nvidia /etc/apt/sources.list.d/nvidia-ml.list /etc/apt/sources.list.d/
1110
COPY --from=nvidia /etc/apt/trusted.gpg /etc/apt/trusted.gpg.d/cuda.gpg
1211

13-
ENV CUDA_MAJOR_VERSION=10
14-
ENV CUDA_MINOR_VERSION=2
15-
ENV CUDA_PATCH_VERSION=89
12+
ENV CUDA_MAJOR_VERSION=11
13+
ENV CUDA_MINOR_VERSION=4
14+
ENV CUDA_PATCH_VERSION=2
1615
ENV CUDA_VERSION=$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION.$CUDA_PATCH_VERSION
17-
ENV CUDA_PKG_VERSION=$CUDA_MAJOR_VERSION-$CUDA_MINOR_VERSION=$CUDA_VERSION-1
18-
ENV CUDNN_VERSION=7.6.5.32
19-
ENV CUBLAS_VERSION=10.2.2.89
16+
ENV CUDA_PKG_VERSION=$CUDA_MAJOR_VERSION-$CUDA_MINOR_VERSION
17+
ENV CUDNN_VERSION=8.2.4.15
2018
LABEL com.nvidia.volumes.needed="nvidia_driver"
2119
LABEL com.nvidia.cuda.version="${CUDA_VERSION}"
2220
LABEL com.nvidia.cudnn.version="${CUDNN_VERSION}"
@@ -39,17 +37,12 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
3937
cuda-nvml-dev-$CUDA_PKG_VERSION \
4038
cuda-minimal-build-$CUDA_PKG_VERSION \
4139
cuda-command-line-tools-$CUDA_PKG_VERSION \
42-
libcudnn7=$CUDNN_VERSION-1+cuda$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION \
43-
libcudnn7-dev=$CUDNN_VERSION-1+cuda$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION \
44-
libcublas10=$CUBLAS_VERSION-1 \
45-
libcublas-dev=$CUBLAS_VERSION-1 \
46-
libnccl2=2.5.6-1+cuda$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION \
47-
libnccl-dev=2.5.6-1+cuda$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION && \
48-
ln -s /usr/local/cuda-$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION /usr/local/cuda && \
49-
ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 && \
50-
# TODO: remove this hack when we move past tensorflow 2.3
51-
# https://github.com/tensorflow/tensorflow/issues/38578#issuecomment-760175854
52-
ln -sf /usr/local/cuda/lib64/libcudart.so.10.2 /usr/local/cuda/lib64/libcudart.so.10.1 && \
40+
libcudnn8=$CUDNN_VERSION-1+cuda$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION \
41+
libcudnn8-dev=$CUDNN_VERSION-1+cuda$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION \
42+
libcublas-$CUDA_PKG_VERSION \
43+
libcublas-dev-$CUDA_PKG_VERSION \
44+
libnccl2=2.11.4-1+cuda$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION \
45+
libnccl-dev=2.11.4-1+cuda$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION && \
5346
/tmp/clean-layer.sh
5447

5548
ENV CUDA_HOME=/usr/local/cuda
@@ -62,7 +55,7 @@ ENV CUDA_HOME=/usr/local/cuda
6255
ADD ldpaths $R_HOME/etc/ldpaths
6356

6457
# Install tensorflow with GPU support
65-
RUN R -e 'keras::install_keras(tensorflow = "2.3-gpu")' && \
58+
RUN R -e 'keras::install_keras(tensorflow = "2.6-gpu")' && \
6659
rm -rf /tmp/tensorflow_gpu && \
6760
/tmp/clean-layer.sh
6861

@@ -77,8 +70,9 @@ RUN CPATH=/usr/local/cuda/targets/x86_64-linux/include install2.r --error --ncpu
7770

7871
# Torch: install the full package upfront otherwise it will be installed on loading the package which doesn't work for kernels
7972
# without internet (competitions for example). It will detect CUDA and install the proper version.
80-
# TODO(b/224540778) Unpin Torch.
81-
RUN R -e 'library(devtools); install_version("torch", version = "0.6.0", ask=FALSE)'
82-
RUN R -e 'library(torch); install_torch(reinstall = TRUE)'
73+
# Make Torch think we use CUDA 11.3 (https://github.com/mlverse/torch/issues/807)
74+
ENV CUDA=11.3
75+
RUN R -e 'install.packages("torch")'
76+
RUN R -e 'library(torch); install_torch()'
8377

8478
CMD ["R"]

package_installs.R

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,9 @@ install.packages("imager")
5252

5353
# Torch: install the full package upfront otherwise it will be installed on loading the package which doesn't work for kernels
5454
# without internet (competitions for example).
55-
# TODO(b/224540778) Unpin Torch.
56-
install_version("torch", version = "0.6.0", ask=FALSE)
55+
install.packages("torch")
5756
library(torch)
58-
install_torch(reinstall = TRUE)
57+
install_torch()
5958

6059
# The R Keras package must be reinstalled after installing it in the python virtualenv.
61-
install_version("keras", version = "2.3.0.0", ask=FALSE)
60+
install_version("keras", version = "2.6.0.0", ask=FALSE)

test

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,12 @@ docker kill jupyter_test_r && docker rm jupyter_test_r
8787
docker run --rm --name=papermill_test_r --read-only --net=none \
8888
"$IMAGE_TAG" python -c 'import sys;import papermill as pm; print(pm.__version__)'
8989

90+
91+
# TF_FORCE_GPU_ALLOW_GROWTH is to prevent tensorflow from allocating the totality of a GPU memory.
92+
# https://stackoverflow.com/questions/34199233/how-to-prevent-tensorflow-from-allocating-the-totality-of-a-gpu-memory/55541385#55541385
9093
docker run --rm -t --net=none \
9194
-e HOME=/tmp \
95+
-e TF_FORCE_GPU_ALLOW_GROWTH=true \
9296
-v $PWD:/input:ro -v /tmp/rstats-build/working:/working \
9397
-v /tmp/rstats-build/tmp:/tmp -v /tmp/rstats-build/devshm:/dev/shm \
9498
-w=/working \

0 commit comments

Comments
 (0)