diff --git a/src/jupyter-aou/.devcontainer.json b/src/jupyter-aou/.devcontainer.json index fd5865d..2a55e1b 100644 --- a/src/jupyter-aou/.devcontainer.json +++ b/src/jupyter-aou/.devcontainer.json @@ -4,21 +4,10 @@ "service": "app", "shutdownAction": "none", "workspaceFolder": "/workspace", - "postCreateCommand": [ - "./startupscript/post-startup.sh", - "jupyter", - "/home/jupyter", - "${templateOption:cloud}", - "${templateOption:login}" - ], + "initializeCommand": "umount -f /tmp/wb-mount/*; rm -rf /tmp/wb-mount/*", + "postCreateCommand": "./startup.sh && ./startupscript/post-startup.sh jupyter /home/jupyter \"${templateOption:cloud}\" \"${templateOption:login}\"", // re-mount bucket files on container start up - "postStartCommand": [ - "./startupscript/remount-on-restart.sh", - "jupyter", - "/home/jupyter", - "${templateOption:cloud}", - "${templateOption:login}" - ], + "postStartCommand": "./startup.sh && ./startupscript/remount-on-restart.sh jupyter /home/jupyter \"${templateOption:cloud}\" \"${templateOption:login}\"", "remoteUser": "root", "customizations": { "workbench": { diff --git a/src/jupyter-aou/build/Dockerfile b/src/jupyter-aou/build/Dockerfile index 3a637f5..34c9693 100644 --- a/src/jupyter-aou/build/Dockerfile +++ b/src/jupyter-aou/build/Dockerfile @@ -1,5 +1,5 @@ # Build the custom extension -FROM us-west2-docker.pkg.dev/shared-pub-buckets-94mvrf/workbench-artifacts/app-jupyter-extension-builder@sha256:8ebcd0253a7fcba6fc0a90dfaf1a190c59ae2f9d7ce3be156e30e7a5eddc9c86 AS extension-builder +FROM us-west2-docker.pkg.dev/shared-pub-buckets-94mvrf/workbench-artifacts/app-jupyter-extension-builder@sha256:70ba6449275a581059f2f851f44bb8e4a1e5d42e73ec512af362e9de0618cf2f AS extension-builder COPY extension /extension # Original extension is https://github.com/QuantStack/jupyterlab-snippets; @@ -11,7 +11,7 @@ ADD https://github.com/darpan097/jupyterlab-snippets.git#a27c8429d2cfaf7aa9e4cad RUN /build.sh /extension /dist -FROM us-west2-docker.pkg.dev/shared-pub-buckets-94mvrf/workbench-artifacts/app-aou-jupyter@sha256:bd995ffb80b8b1b14f122b6ceb9e66c2037bbb9b182b4501c61b944cef4220a2 +FROM us-west2-docker.pkg.dev/shared-pub-buckets-94mvrf/workbench-artifacts/app-aou-jupyter@sha256:6375f16454add4980acf83c75b70f6c7fce8668a674d2260a1ff0d20886ce60e COPY snippets $JUPYTER_USER_HOME_DIR/.local/share/jupyter/snippets @@ -26,3 +26,8 @@ RUN jupyter labextension disable @jupyterlab/filebrowser-extension:download && \ # Install all extensions we built RUN --mount=type=bind,from=extension-builder,source=/dist,target=/tmp/extensions \ /tmp/extensions/install.sh + +USER root +COPY remotefuse /opt/remotefuse +RUN chmod +x /opt/remotefuse/* +USER $JUPYTER_USER diff --git a/src/jupyter-aou/build/remotefuse/fusermount b/src/jupyter-aou/build/remotefuse/fusermount new file mode 100644 index 0000000..f3b5b83 --- /dev/null +++ b/src/jupyter-aou/build/remotefuse/fusermount @@ -0,0 +1,2 @@ +#!/bin/bash +remotefuse fusermount "$@" diff --git a/src/jupyter-aou/build/remotefuse/gcsfuse b/src/jupyter-aou/build/remotefuse/gcsfuse new file mode 100644 index 0000000..ccb289d --- /dev/null +++ b/src/jupyter-aou/build/remotefuse/gcsfuse @@ -0,0 +1,2 @@ +#!/bin/bash +remotefuse gcsfuse "$@" diff --git a/src/jupyter-aou/build/remotefuse/goofys b/src/jupyter-aou/build/remotefuse/goofys new file mode 100644 index 0000000..e41f06f --- /dev/null +++ b/src/jupyter-aou/build/remotefuse/goofys @@ -0,0 +1,2 @@ +#!/bin/bash +remotefuse goofys "$@" diff --git a/src/jupyter-aou/build/remotefuse/remotefuse b/src/jupyter-aou/build/remotefuse/remotefuse new file mode 100644 index 0000000..5f30a5d --- /dev/null +++ b/src/jupyter-aou/build/remotefuse/remotefuse @@ -0,0 +1,11 @@ +#!/bin/bash +set -o errexit + +# SSH passes arguments as a single string, so we will encode it as a JSON array. +# First escape each argument, then join them into a JSON array. +ARGS="$(for ARG in "$@"; do + printf "%s" "$ARG" | jq -Rs +done | jq -jsc)" +readonly ARGS + +LC_ALL=C.UTF-8 /usr/bin/ssh -i /home/jupyter/.ssh/remotefuse -T -o "StrictHostKeyChecking no" remotefuse@remotefuse "$ARGS" diff --git a/src/jupyter-aou/docker-compose.yaml b/src/jupyter-aou/docker-compose.yaml index 746b433..fcf45fe 100644 --- a/src/jupyter-aou/docker-compose.yaml +++ b/src/jupyter-aou/docker-compose.yaml @@ -10,18 +10,15 @@ services: restart: always volumes: - .:/workspace:cached + - /tmp/wb-mount:/home/jupyter/workspace:slave + - ssh-keys:/ssh-keys ports: - "8888:8888" networks: - app-network - cap_add: - - SYS_ADMIN - devices: - - /dev/fuse - security_opt: - - apparmor:unconfined depends_on: - wondershaper + - remotefuse wondershaper: container_name: "wondershaper" image: "us-west2-docker.pkg.dev/shared-pub-buckets-94mvrf/workbench-artifacts/app-wondershaper@sha256:dd9df1811b9d15f4f8d95b6e515a2371e12d238240b8ef7359be77d961e79e3a" @@ -29,6 +26,26 @@ services: network_mode: "host" cap_add: - NET_ADMIN + remotefuse: + container_name: "remotefuse" + build: + context: ./remotefuse + platforms: + - "linux/amd64" + restart: always + working_dir: /workspace + volumes: + - .:/workspace:cached + - ssh-keys:/ssh-keys + - /tmp/wb-mount:/home/remotefuse/workspace:shared + networks: + - app-network + cap_add: + - SYS_ADMIN + devices: + - /dev/fuse networks: app-network: external: true +volumes: + ssh-keys: diff --git a/src/jupyter-aou/remotefuse/Dockerfile b/src/jupyter-aou/remotefuse/Dockerfile new file mode 100644 index 0000000..34c1b77 --- /dev/null +++ b/src/jupyter-aou/remotefuse/Dockerfile @@ -0,0 +1,55 @@ +FROM debian:latest + +RUN apt-get update --yes && \ + apt-get install -yq --no-install-recommends \ + jq \ + openssh-server \ + sudo \ + curl \ + lsb-release \ + inotify-tools \ + wget \ + locales \ + # gcloud CLI dependencies + apt-transport-https \ + ca-certificates \ + gnupg \ + fuse \ + # aws CLI dependencies + libc6 \ + groff + +# Install gcloud CLI and gcsfuse +RUN echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" > /etc/apt/sources.list.d/google-cloud-sdk.list \ + && echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt gcsfuse-$(lsb_release -c -s) main" > /etc/apt/sources.list.d/gcsfuse.list \ + && wget -qO- https://packages.cloud.google.com/apt/doc/apt-key.gpg | gpg --dearmor -o /usr/share/keyrings/cloud.google.gpg \ + && apt-get update -y \ + && apt-get install -yq --no-install-recommends google-cloud-cli gcsfuse + +# Install aws CLI and goofys for s3 bucket mounting +RUN apt-get update -y \ + && apt-get install -yq --no-install-recommends awscli \ + && wget "https://github.com/kahing/goofys/releases/latest/download/goofys" -O goofys \ + && chmod +x goofys \ + && mv goofys /usr/local/bin/ + +COPY remotefuse /remotefuse +RUN chmod +x /remotefuse +COPY entrypoint.sh /entrypoint.sh + +ENV USER=remotefuse +ENV UID=1000 +ENV USER_HOME_DIR=/home/remotefuse + +# Create a user with the shell set to /remotefuse. This prevents the user from +# executing any other commands +RUN useradd -l -m -d $USER_HOME_DIR \ + -u $UID \ + -g users \ + -s /remotefuse $USER \ + # Hide the motd and last login message + && touch $USER_HOME_DIR/.hushlogin \ + # Uncomment user_allow_other in the fuse.conf to enable non-root user to mount files with -o allow-other option. + && sed -i '/user_allow_other/s/^#//g' /etc/fuse.conf + +ENTRYPOINT [ "/bin/sh", "/entrypoint.sh" ] diff --git a/src/jupyter-aou/remotefuse/entrypoint.sh b/src/jupyter-aou/remotefuse/entrypoint.sh new file mode 100644 index 0000000..5f9fcaf --- /dev/null +++ b/src/jupyter-aou/remotefuse/entrypoint.sh @@ -0,0 +1,48 @@ +#!/bin/bash + +set -o errexit +set -o nounset +set -o xtrace + +cleanup() { + find /home/remotefuse/workspace/ -mindepth 1 -maxdepth 1 -type d -exec fusermount -u {} \; + find /home/remotefuse/workspace/ -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} \; + exit +} +trap cleanup INT TERM + +process_key() { + if [ ! -f /ssh-keys/remotefuse.pub ]; then + return + fi + + # Add the app service's public key to authorized_keys in restricted mode + (echo -n 'restrict '; cat /ssh-keys/remotefuse.pub) > "$SSH_DIR/authorized_keys" + # Immediately remove the public key from the volume, so that we won't + # try to reuse it. The main application container will generate a new + # one. + rm -f /ssh-keys/remotefuse.pub +} + +watch_keys() { + inotifywait -m -e create -e moved_to /ssh-keys | + while read -r REPLY; do + process_key + done +} + +readonly SSH_DIR="/home/remotefuse/.ssh" + +# SSH Key setup +mkdir -p "$SSH_DIR" +touch "$SSH_DIR/authorized_keys" +chown -R remotefuse:users "$SSH_DIR" +chmod 600 "$SSH_DIR/authorized_keys" + +process_key +service ssh start + +# Keep the container running, but in the background so that interrupts can be +# caught +watch_keys & +wait $! diff --git a/src/jupyter-aou/remotefuse/remotefuse b/src/jupyter-aou/remotefuse/remotefuse new file mode 100644 index 0000000..85ce6c6 --- /dev/null +++ b/src/jupyter-aou/remotefuse/remotefuse @@ -0,0 +1,57 @@ +#!/bin/bash +set -o errexit + +if [ "$1" == "-c" ]; then + shift +fi + +# SSH commands are passed as a single string, so we need to split it into an +# array. Arguments are expected to be a JSON array +readarray -t ESCAPED_ARGS < <(jq -c '.[]' <<< "$1") +readonly ESCAPED_ARGS + +# ESCAPED_ARGS are escaped JSON strings, so we need to unescape them +ORIG_ARGS=() +for ARG in "${ESCAPED_ARGS[@]}"; do + ORIG_ARGS+=("$(jq -r <<< "$ARG")") +done + +readonly COMMAND="${ORIG_ARGS[0]}" +case "$COMMAND" in + gcsfuse|goofys) + ;; + fusermount) + if [ "${ORIG_ARGS[1]}" != "-u" ]; then + echo "Error: remotefuse fusermount must be called with -u option." + exit 1 + fi + ;; + *) + echo "Usage: remotefuse {gcsfuse|goofys|fusermount -u} [args...]" + exit 1 + ;; +esac + +ORIG_ARGS=("${ORIG_ARGS[@]:1}") +readonly ORIG_ARGS + +readonly PATH_MATCHER="^.+\/workspace\/(.+)$" +ARGS=() +for ARG in "${ORIG_ARGS[@]}"; do + # Look for a workbench path in the arguments and replace it with one under + # /home/remotefuse/workbench. + # e.g. /home/jupyter/workbench/abc/def will be replaced with + # /home/remotefuse/workbench/abc/def + # + # $PATH_MATCHER cannot be quoted, otherwise it will treat it as string + # matching. + if [[ "$ARG" =~ $PATH_MATCHER ]]; then + MOUNT_PATH="/home/remotefuse/workspace/${BASH_REMATCH[1]}" + ARGS+=("$MOUNT_PATH") + else + ARGS+=("$ARG") + fi +done +readonly ARGS + +${COMMAND} "${ARGS[@]}" diff --git a/src/jupyter-aou/startup.sh b/src/jupyter-aou/startup.sh new file mode 100755 index 0000000..b90c9b8 --- /dev/null +++ b/src/jupyter-aou/startup.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +# This script is a custom startup script for jupyter aou. It is used to generate +# an SSH key pair for the jupyter user to be used in the remotefuse sidecar. +# /ssh-keys should be a volume mounted to both containers. It also sets +# permissions on /home/jupyter/workspace to allow the jupyter user to read/write to +# it. + +set -o errexit +set -o nounset +set -o pipefail +set -o xtrace + +readonly USER_NAME="jupyter" +readonly RUN_AS_LOGIN_USER="sudo -u ${USER_NAME} bash -l -c" + +rm -rf "/home/${USER_NAME}/.ssh" +${RUN_AS_LOGIN_USER} "mkdir -p '/home/${USER_NAME}/.ssh'" +${RUN_AS_LOGIN_USER} "ssh-keygen -q -f '/home/${USER_NAME}/.ssh/remotefuse' -N ''" +cp "/home/${USER_NAME}/.ssh/remotefuse.pub" /ssh-keys/remotefuse.pub + +# The remaining commands are expected to fail if this is not the first run +set +o errexit + +# This will fail if any resources are already mounted, since the mounted +# resources can't be chowned +chown -R ${USER_NAME}:users "/home/${USER_NAME}/workspace" + +# Modify the startup script so that /opt/remotefuse always takes priority over +# /usr/bin +sed -i 's/export PATH=\/usr\/bin:/export PATH=\/opt\/remotefuse:\/usr\/bin:/g' /workspace/startupscript/post-startup.sh