rail-berkeley · j3soon · Nov 27, 2023 · Nov 28, 2023 · Nov 29, 2023 · Nov 29, 2023
diff --git a/README.md b/README.md
@@ -40,32 +40,95 @@ conda remove --name softlearning --all
 ## Docker Installation
 
 ### docker-compose
-To build the image and run the container:
-```
-export MJKEY="$(cat ~/.mujoco/mjkey.txt)" \
-    && docker-compose \
-        -f ./docker/docker-compose.dev.cpu.yml \
-        up \
-        -d \
-        --force-recreate
-```
+
+To build the image:
+
+- CPU:
+
+  ```sh
+  DOCKER_BUILDKIT=1 \
+    docker build \
+    -f ./docker/Dockerfile.softlearning.base.cpu \
+    -t softlearning:latest-cpu \
+    --progress=plain \
+    --secret id=mjkey,src="${HOME}/.mujoco/mjkey.txt" .
+  ```
+
+- GPU:
+
+  ```sh
+  DOCKER_BUILDKIT=1 \
+    docker build \
+    -f ./docker/Dockerfile.softlearning.base.gpu \
+    -t softlearning:latest-gpu \
+    --progress=plain \
+    --secret id=mjkey,src="${HOME}/.mujoco/mjkey.txt" .
+  ```
+
+and run the container:
+
+- CPU:
+
+  ```sh
+  docker-compose \
+    -p ${USER} \
+    -f ./docker/docker-compose.dev.cpu.yml \
+    up \
+    -d \
+    --force-recreate
+  ```
+
+- GPU:
+
+  ```sh
+  docker-compose \
+    -p ${USER} \
+    -f ./docker/docker-compose.dev.gpu.yml \
+    up \
+    -d \
+    --force-recreate
+  ```
 
 You can access the container with the typical Docker [exec](https://docs.docker.com/engine/reference/commandline/exec/)-command, i.e.
 
-```
-docker exec -it softlearning bash
-```
+- CPU:
+
+  ```sh
+  docker exec -it softlearning-dev-cpu bash
+  pip install -e .
+  ```
+
+- GPU:
+
+  ```sh
+  docker exec -it softlearning-dev-gpu bash
+  pip install -e .
+  # Make sure to add the `--trial-gpus` flag for the `softlearning` command.
+  ```
 
 See examples section for examples of how to train and simulate the agents.
 
 Finally, to clean up the docker setup:
-```
-docker-compose \
-    -f ./docker/docker-compose.dev.cpu.yml \
-    down \
-    --rmi all \
-    --volumes
-```
+
+- CPU:
+
+  ```sh
+  docker-compose \
+      -f ./docker/docker-compose.dev.cpu.yml \
+      down \
+      --rmi all \
+      --volumes
+  ```
+
+- GPU:
+
+  ```sh
+  docker-compose \
+      -f ./docker/docker-compose.dev.gpu.yml \
+      down \
+      --rmi all \
+      --volumes
+  ```
 
 ## Examples
 ### Training and simulating an agent

diff --git a/docker/Dockerfile.softlearning.base.cpu b/docker/Dockerfile.softlearning.base.cpu
@@ -51,8 +51,8 @@ RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86
     echo ". /opt/conda/etc/profile.d/conda.sh" >> /etc/bash.bashrc
 
 RUN apt-get install -y curl grep sed dpkg && \
-    TINI_VERSION=`curl https://github.com/krallin/tini/releases/latest | grep -o "/v.*\"" | sed 's:^..\(.*\).$:\1:'` && \
-    curl -L "https://github.com/krallin/tini/releases/download/v${TINI_VERSION}/tini_${TINI_VERSION}.deb" > tini.deb && \
+    TINI_VERSION=`curl https://api.github.com/repos/krallin/tini/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/'` && \
+    curl -L "https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini_${TINI_VERSION:1}.deb" > tini.deb && \
     dpkg -i tini.deb && \
     rm tini.deb && \
     apt-get clean \
@@ -133,10 +133,15 @@ COPY ./requirements.txt /tmp/
 RUN --mount=type=secret,id=mjkey,dst=/root/.mujoco/mjkey.txt \
     conda env update -f /tmp/environment.yml \
     && conda clean --all -y
+RUN source activate softlearning \
+    && pip install setuptools==65.5.0 wheel==0.38.0 \
+    && pip install gym==0.18.0
 
 RUN echo "conda activate softlearning" >> ~/.bashrc \
     && echo "cd ~/softlearning" >> ~/.bashrc
 
+RUN git config --global --add safe.directory /root/softlearning
+
 
 # =========== Container Entrypoint =============
 COPY ./docker/entrypoint.sh /entrypoint.sh

diff --git a/docker/Dockerfile.softlearning.base.gpu b/docker/Dockerfile.softlearning.base.gpu
@@ -25,18 +25,11 @@
 #   --force-recreate
 
 
-ARG UBUNTU_VERSION=18.04
-ARG ARCH=
-ARG CUDA=10.0
+# ARG UBUNTU_VERSION=20.04
 
-FROM nvidia/cudagl${ARCH:+-$ARCH}:${CUDA}-base-ubuntu${UBUNTU_VERSION} as base
-# ARCH and CUDA are specified again because the FROM directive resets ARGs
-# (but their default value is retained if set previously)
+FROM nvcr.io/nvidia/tensorflow:21.05-tf2-py3
 
-ARG UBUNTU_VERSION
-ARG ARCH
-ARG CUDA
-ARG CUDNN=7.4.1.5-1
+# ARG UBUNTU_VERSION
 
 SHELL ["/bin/bash", "-c"]
 
@@ -58,8 +51,8 @@ RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86
     echo ". /opt/conda/etc/profile.d/conda.sh" >> /etc/bash.bashrc
 
 RUN apt-get install -y curl grep sed dpkg && \
-    TINI_VERSION=`curl https://github.com/krallin/tini/releases/latest | grep -o "/v.*\"" | sed 's:^..\(.*\).$:\1:'` && \
-    curl -L "https://github.com/krallin/tini/releases/download/v${TINI_VERSION}/tini_${TINI_VERSION}.deb" > tini.deb && \
+    TINI_VERSION=`curl https://api.github.com/repos/krallin/tini/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/'` && \
+    curl -L "https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini_${TINI_VERSION:1}.deb" > tini.deb && \
     dpkg -i tini.deb && \
     rm tini.deb && \
     apt-get clean \
@@ -70,38 +63,6 @@ RUN conda update -y --name base conda \
     && conda clean --all -y
 
 
-# ========== Tensorflow dependencies ==========
-RUN apt-get update \
-    && apt-get install -y --no-install-recommends \
-        build-essential \
-        cuda-command-line-tools-${CUDA/./-} \
-        cuda-cublas-${CUDA/./-} \
-        cuda-cufft-${CUDA/./-} \
-        cuda-curand-${CUDA/./-} \
-        cuda-cusolver-${CUDA/./-} \
-        cuda-cusparse-${CUDA/./-} \
-        curl \
-        libcudnn7=${CUDNN}+cuda${CUDA} \
-        libfreetype6-dev \
-        libhdf5-serial-dev \
-        libzmq3-dev \
-        pkg-config \
-        software-properties-common \
-        zip \
-        unzip \
-    && apt-get clean \
-    && rm -rf /var/lib/apt/lists/*
-
-RUN [ ${ARCH} = ppc64le ] || (apt-get update && \
-        apt-get install nvinfer-runtime-trt-repo-ubuntu${UBUNTU_VERSION/./}-5.0.2-ga-cuda${CUDA} \
-        && apt-get update \
-        && apt-get install -y --no-install-recommends libnvinfer5=5.0.2-1+cuda${CUDA} \
-        && apt-get clean \
-        && rm -rf /var/lib/apt/lists/*)
-
-# For CUDA profiling, TensorFlow requires CUPTI.
-ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH
-
 # ========== Softlearning dependencies ==========
 RUN apt-get update -y \
     && apt-get install -y --no-install-recommends \
@@ -135,13 +96,6 @@ RUN apt-get update -y \
         xpra \
         xserver-xorg-dev \
         xvfb \
-    && export CLOUD_SDK_REPO="cloud-sdk-$(lsb_release -c -s)" \
-    && echo "deb http://packages.cloud.google.com/apt $CLOUD_SDK_REPO main" \
-            | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list \
-    && curl https://packages.cloud.google.com/apt/doc/apt-key.gpg \
-            | apt-key add - \
-    && apt-get update -y \
-    && apt-get install -y google-cloud-sdk \
     && apt-get clean \
     && rm -rf /var/lib/apt/lists/*
 
@@ -163,19 +117,27 @@ ENV LD_LIBRARY_PATH ${LD_LIBRARY_PATH}:/usr/lib/nvidia-000
 
 
 # ========== Conda Environment ==========
-COPY ./environment.yml /tmp/environment.yml
-COPY ./requirements.txt /tmp/requirements.txt
+COPY ./environment.yml /tmp/
+COPY ./requirements.txt /tmp/
 
 # NOTE: Fetch `mjkey.txt` from secret mount to avoid writing it to the build
 # history. For details, see:
 # https://docs.docker.com/develop/develop-images/build_enhancements/#new-docker-build-secret-information
 RUN --mount=type=secret,id=mjkey,dst=/root/.mujoco/mjkey.txt \
     conda env update -f /tmp/environment.yml \
     && conda clean --all -y
+RUN source activate softlearning \
+    && pip install setuptools==65.5.0 wheel==0.38.0 \
+    && pip install gym==0.18.0
 
 RUN echo "conda activate softlearning" >> ~/.bashrc \
     && echo "cd ~/softlearning" >> ~/.bashrc
 
+RUN git config --global --add safe.directory /root/softlearning
+
+RUN ln /usr/local/cuda-11.3/targets/x86_64-linux/lib/libcusolver.so.11 /usr/local/cuda-11.3/targets/x86_64-linux/lib/libcusolver.so.10
+ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda-11.3/targets/x86_64-linux/lib/
+
 
 # =========== Container Entrypoint =============
 COPY ./docker/entrypoint.sh /entrypoint.sh

diff --git a/docker/docker-compose.dev.gpu.yml b/docker/docker-compose.dev.gpu.yml
@@ -29,3 +29,4 @@ services:
       - bash
     stdin_open: true
     tty: true
+    shm_size: '60gb'
diff --git a/requirements.txt b/requirements.txt
@@ -17,8 +17,6 @@ click==7.1.2
 cloudpickle==1.6.0
 colorama==0.4.4
 colorful==0.5.4
-conda==4.9.2
-conda-package-handling==1.7.2
 cryptography==3.3.2
 cycler==0.10.0
 Cython==0.29.21
@@ -42,7 +40,6 @@ googleapis-common-protos==1.52.0
 gpustat==0.6.0
 grpcio==1.32.0
 gtimer==1.0.0b5
-gym==0.18.0
 h5py==2.10.0
 hiredis==1.1.0
 idna==2.10
@@ -61,7 +58,7 @@ lxml==4.6.2
 Markdown==3.3.3
 matplotlib==3.3.3
 msgpack==1.0.2
-mujoco-py==2.0.2.13
+mujoco-py==2.0.2.10
 multidict==5.1.0
 networkx==2.5
 numpy==1.19.5