From 61629abe357d47585932983c357913385473f045 Mon Sep 17 00:00:00 2001 From: telliere Date: Tue, 19 Mar 2024 17:00:46 +0200 Subject: [PATCH 01/18] changing debian as base image for alpine and bumping apptainer for newer version --- client/container_preparation/Dockerfile | 15 ++++++++------- client/container_preparation/entrypoint.sh | 2 +- .../tools/docker/build_env/Dockerfile | 6 +++--- client/data_preparation/Dockerfile | 16 +++++++++------- client/data_preparation/entrypoint.sh | 2 +- 5 files changed, 22 insertions(+), 19 deletions(-) diff --git a/client/container_preparation/Dockerfile b/client/container_preparation/Dockerfile index 48cf9d3..6f4ccf5 100644 --- a/client/container_preparation/Dockerfile +++ b/client/container_preparation/Dockerfile @@ -1,15 +1,16 @@ # Using Python original Docker image -FROM python:3.9-bullseye - -# Update & Upgrade global packages base -RUN apt update -y \ - && apt upgrade -y +FROM --platform=linux/amd64 python:3.9-alpine # Install necessary packages -RUN apt install -y \ +RUN apk add \ git \ curl \ - jq + jq \ + build-base \ + libffi-dev + +RUN curl https://sh.rustup.rs -sSf -o rustup.sh ; chmod +x rustup.sh ; ./rustup.sh -y +ENV PATH="$PATH:/root/.cargo/bin" # Install spire-agent RUN wget -q https://github.com/spiffe/spire/releases/download/v1.9.0/spire-1.9.0-linux-amd64-musl.tar.gz diff --git a/client/container_preparation/entrypoint.sh b/client/container_preparation/entrypoint.sh index c34122e..56d4c74 100755 --- a/client/container_preparation/entrypoint.sh +++ b/client/container_preparation/entrypoint.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/bin/sh # ## This entrypoint wraps up the container preparation with the agent spawning and the key shipping. # diff --git a/client/container_preparation/tools/docker/build_env/Dockerfile b/client/container_preparation/tools/docker/build_env/Dockerfile index d3b04fb..e7fae83 100644 --- a/client/container_preparation/tools/docker/build_env/Dockerfile +++ b/client/container_preparation/tools/docker/build_env/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:22.04 +FROM --platform=linux/amd64 ubuntu:22.04 # Update & Upgrade global packages base RUN apt update -y \ @@ -17,8 +17,8 @@ RUN apt install \ RUN DEBIAN_FRONTEND=noninteractive apt install -y cryptsetup # Get apptainer installer and install it -RUN wget -q https://github.com/apptainer/apptainer/releases/download/v1.2.5/apptainer_1.2.5_amd64.deb -RUN apt install -y ./apptainer_1.2.5_amd64.deb +RUN wget -q https://github.com/apptainer/apptainer/releases/download/v1.3.0/apptainer_1.3.0_amd64.deb +RUN apt install -y ./apptainer_1.3.0_amd64.deb # Create a directory to eventually map for output image RUN mkdir /output diff --git a/client/data_preparation/Dockerfile b/client/data_preparation/Dockerfile index b57b4bd..205e435 100644 --- a/client/data_preparation/Dockerfile +++ b/client/data_preparation/Dockerfile @@ -1,15 +1,17 @@ # Using Python original Docker image -FROM python:3.9-bullseye - -# Update & Upgrade global packages base -RUN apt update -y \ - && apt upgrade -y +FROM --platform=linux/amd64 python:3.9-alpine # Install necessary packages -RUN apt install -y \ +RUN apk add \ git \ curl \ - jq + jq \ + build-base \ + libffi-dev + +# Install Rust +RUN curl https://sh.rustup.rs -sSf -o rustup.sh ; chmod +x rustup.sh ; ./rustup.sh -y +ENV PATH="$PATH:/root/.cargo/bin" # Install spire-agent RUN wget -q https://github.com/spiffe/spire/releases/download/v1.9.0/spire-1.9.0-linux-amd64-musl.tar.gz diff --git a/client/data_preparation/entrypoint.sh b/client/data_preparation/entrypoint.sh index 5a75b73..1627cee 100755 --- a/client/data_preparation/entrypoint.sh +++ b/client/data_preparation/entrypoint.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/bin/sh # ## This entrypoint wraps up the Data preparation with the agent spawning and the key shipping. # From 7a96c063eb42fdfcb542169b7b4e2775f5e019fe Mon Sep 17 00:00:00 2001 From: telliere Date: Tue, 19 Mar 2024 17:01:08 +0200 Subject: [PATCH 02/18] editing docker utils to use only one docker client --- client/container_preparation/lib/image_build.py | 6 +++--- client/container_preparation/tools/docker/docker_utils.py | 6 ++---- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/client/container_preparation/lib/image_build.py b/client/container_preparation/lib/image_build.py index 0a65c07..d38eb85 100644 --- a/client/container_preparation/lib/image_build.py +++ b/client/container_preparation/lib/image_build.py @@ -112,11 +112,11 @@ def create_sif_image( encrypted (bool, optional): Wether or not to encrypt the container, keys are generated relatively to the current path, and are called "keys, keys.pub". Defaults to False. """ # Check that the build environment exists - build_env_exists = check_build_env_exists() + build_env_exists = check_build_env_exists(docker_client=docker_client) if not build_env_exists: print("Build environment container image doesn't exist, building it") - build_build_env() - build_env_exists = check_build_env_exists() + build_build_env(docker_client=docker_client) + build_env_exists = check_build_env_exists(docker_client=docker_client) # Fixing the build environment image tag build_env_image_tag = build_env_exists.attrs["RepoTags"][0] diff --git a/client/container_preparation/tools/docker/docker_utils.py b/client/container_preparation/tools/docker/docker_utils.py index 719b471..9b08783 100644 --- a/client/container_preparation/tools/docker/docker_utils.py +++ b/client/container_preparation/tools/docker/docker_utils.py @@ -1,23 +1,21 @@ import docker, os -def check_build_env_exists(): +def check_build_env_exists(docker_client : docker.DockerClient): """Verify that the build environment (docker image sd-container/build_env) exists. Returns: bool: Wether or not the sd-container/build_env image exists. """ # Check the build env exists - docker_client = docker.from_env() try: return docker_client.images.get("sd-container/build_env") except docker.errors.ImageNotFound: return False -def build_build_env(): +def build_build_env(docker_client : docker.DockerClient): """Builds the build environment""" - docker_client = docker.from_env() docker_client.images.build( path=f"{os.path.realpath(os.path.dirname(__file__))}/build_env", dockerfile="./Dockerfile", From 80716eecbd8ec832bb71573df7b8666b6fb832e5 Mon Sep 17 00:00:00 2001 From: telliere Date: Tue, 19 Mar 2024 17:01:37 +0200 Subject: [PATCH 03/18] first attempt to dockerize server and job preparation --- client/job_preparation/Dockerfile | 29 ++++++++++++++++++++ client/job_preparation/requirements.txt | 8 ++++++ server/Dockerfile | 31 ++++++++++++++++++++++ server/entrypoint.sh | 35 +++++++++++++++++++++++++ server/lib/spire_interactions.py | 6 +++-- server/requirements.txt | 6 ++--- 6 files changed, 109 insertions(+), 6 deletions(-) create mode 100644 client/job_preparation/Dockerfile create mode 100644 client/job_preparation/requirements.txt create mode 100644 server/Dockerfile create mode 100755 server/entrypoint.sh diff --git a/client/job_preparation/Dockerfile b/client/job_preparation/Dockerfile new file mode 100644 index 0000000..a5ee06b --- /dev/null +++ b/client/job_preparation/Dockerfile @@ -0,0 +1,29 @@ +# Using Python original Docker image +FROM --platform=linux/amd64 python:3.9-alpine + +# Install necessary packages +RUN apk add \ + curl \ + build-base \ + libffi-dev + +RUN curl https://sh.rustup.rs -sSf -o rustup.sh ; chmod +x rustup.sh ; ./rustup.sh -y +ENV PATH="$PATH:/root/.cargo/bin" + +# Create code directory, output directory +RUN mkdir /job_preparation + +# Copy useful data from the project +COPY ./client/job_preparation /job_preparation + +# Copy utils for SPIFFEID creation ... +COPY ./utils /job_preparation/utils + +# Install dependencies +RUN cd /job_preparation && pip install -r ./requirements.txt + +# Set workdir +WORKDIR /job_preparation + +# Set entrypoint +ENTRYPOINT [ "python3", "./prepare_job.py" ] \ No newline at end of file diff --git a/client/job_preparation/requirements.txt b/client/job_preparation/requirements.txt new file mode 100644 index 0000000..3e89ab2 --- /dev/null +++ b/client/job_preparation/requirements.txt @@ -0,0 +1,8 @@ +cryptography==42.0.5 +pyOpenSSL==24.0.0 +protobuf==3.20.0 +pyyaml==5.3.1 +pyrage==1.1.2 +paramiko==3.4.0 +scp==0.14.5 +pre-commit diff --git a/server/Dockerfile b/server/Dockerfile new file mode 100644 index 0000000..772803c --- /dev/null +++ b/server/Dockerfile @@ -0,0 +1,31 @@ +# Using Python original Docker image +FROM --platform=linux/amd64 python:3.9-alpine + +RUN apk add \ + git \ + build-base \ + openssl + +# Install spire-agent +RUN wget -q https://github.com/spiffe/spire/releases/download/v1.9.0/spire-1.9.0-linux-amd64-musl.tar.gz +RUN tar xvf spire-1.9.0-linux-amd64-musl.tar.gz ; mv spire-1.9.0 /opt ; mv /opt/spire-1.9.0 /opt/spire +RUN ln -s /opt/spire/bin/spire-agent /usr/bin/spire-agent + +# Install pyspiffe package +RUN pip install git+https://github.com/HewlettPackard/py-spiffe.git + +# Copy server +RUN mkdir /server +COPY ./server /server + +# Install dependencies +RUN cd /server && pip install -r ./requirements.txt + +# Copy utils +COPY ./utils /server/utils + +# Set workdir +WORKDIR /server + +# Set entrypoint +ENTRYPOINT [ "./entrypoint.sh" ] \ No newline at end of file diff --git a/server/entrypoint.sh b/server/entrypoint.sh new file mode 100755 index 0000000..93088da --- /dev/null +++ b/server/entrypoint.sh @@ -0,0 +1,35 @@ +#!/bin/sh +# +## This entrypoint wraps the HPCS server with a spire agent +# + +# export PYTHONPATH="${PYTHONPATH}:/server:/utils" + +# Cleanup spire-agent generated files +end_entrypoint() { + echo "Cleaning everything before leaving ..." + rm -rf /tmp/data + rm -r /tmp/spire-agent + kill "$1" + exit "$2" +} + +# Reset spire data everytime +rm -rf /tmp/data + +# Spawn spire agent with mounted configuration +spire-agent run -config /tmp/agent.conf || end_entrypoint 0 1 & +spire_agent_pid=$! + +agent_socket_path=$(cat /tmp/agent.conf | grep "socket_path" | cut -d "=" -f2 | cut -d "\"" -f1) + +sleep 10 +until [ -e $agent_socket_path ] +do + echo -e "${RED}[LUMI-SD][Data preparation] Spire workload api socket doesn't exist, waiting 10 seconds ${NC}" + sleep 10 +done + +python3 ./app.py || end_entrypoint $spire_agent_pid 1 + +end_entrypoint $spire_agent_pid 0 \ No newline at end of file diff --git a/server/lib/spire_interactions.py b/server/lib/spire_interactions.py index e6b6fce..f369ef8 100644 --- a/server/lib/spire_interactions.py +++ b/server/lib/spire_interactions.py @@ -8,8 +8,10 @@ pre_command = "microk8s.kubectl exec -n spire spire-server-0 --" -jwt_workload_api = default_jwt_source.DefaultJwtSource( - spiffe_socket_path="unix:///tmp/spire-agent/public/api.sock" +jwt_workload_api = default_jwt_source.DefaultJwtSource( + workload_api_client=None, + spiffe_socket_path="unix:///tmp/spire-agent/public/api.sock", + timeout_in_seconds=None ) diff --git a/server/requirements.txt b/server/requirements.txt index 13e73f2..de47add 100644 --- a/server/requirements.txt +++ b/server/requirements.txt @@ -1,7 +1,5 @@ -cryptography==2.8 -dockerfile_parse==2.0.1 -pyOpenSSL==19.0.0 -docker==7.0.0 +cryptography==42.0.5 +pyOpenSSL==24.0.0 protobuf==3.20.0 hvac==2.1.0 quart==0.19.4 From 427341d51522dc901d08bf5cb3cdae79c6db5d57 Mon Sep 17 00:00:00 2001 From: telliere Date: Wed, 20 Mar 2024 10:24:18 +0200 Subject: [PATCH 04/18] creating actions to publish HPCS server image --- .github/workflows/build-server-image.yml | 40 ++++++++++++++++++++++++ server/Dockerfile | 1 + 2 files changed, 41 insertions(+) create mode 100644 .github/workflows/build-server-image.yml diff --git a/.github/workflows/build-server-image.yml b/.github/workflows/build-server-image.yml new file mode 100644 index 0000000..03b656c --- /dev/null +++ b/.github/workflows/build-server-image.yml @@ -0,0 +1,40 @@ +name: Publish server docker image +on: [push] + +env: + IMAGE_NAME: hpcs/server + +jobs: + push: + runs-on: ubuntu-latest + permissions: + packages: write + contents: read + steps: + - uses: actions/checkout@v4 + + - name: Build image + run: docker build . -f ./server/Dockerfile -t $IMAGE_NAME --label "runnumber=${GITHUB_RUN_ID}" + + - name: Log in to registry + run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u $ --password-stdin + + - name: Push image + run: | + IMAGE_ID=ghcr.io/${{ github.repository_owner }}/$IMAGE_NAME + + # This changes all uppercase characters to lowercase. + IMAGE_ID=$(echo $IMAGE_ID | tr '[A-Z]' '[a-z]') + + # This strips the git ref prefix from the version. + VERSION=$(echo "${{ github.ref }}" | sed -e 's,.*/\(.*\),\1,') + + # This strips the "v" prefix from the tag name. + [[ "${{ github.ref }}" == "refs/tags/"* ]] && VERSION=$(echo $VERSION | sed -e 's/^v//') + + # This uses the Docker `latest` tag convention. + [ "$VERSION" == "main" ] && VERSION=latest + echo IMAGE_ID=$IMAGE_ID + echo VERSION=$VERSION + docker tag $IMAGE_NAME $IMAGE_ID:$VERSION + docker push $IMAGE_ID:$VERSION \ No newline at end of file diff --git a/server/Dockerfile b/server/Dockerfile index 772803c..fd1aa1b 100644 --- a/server/Dockerfile +++ b/server/Dockerfile @@ -10,6 +10,7 @@ RUN apk add \ RUN wget -q https://github.com/spiffe/spire/releases/download/v1.9.0/spire-1.9.0-linux-amd64-musl.tar.gz RUN tar xvf spire-1.9.0-linux-amd64-musl.tar.gz ; mv spire-1.9.0 /opt ; mv /opt/spire-1.9.0 /opt/spire RUN ln -s /opt/spire/bin/spire-agent /usr/bin/spire-agent +RUN ln -s /opt/spire/bin/spire-server /usr/bin/spire-server # Install pyspiffe package RUN pip install git+https://github.com/HewlettPackard/py-spiffe.git From b5c42e99b06ab8dc5d84e7053d48df3356afa993 Mon Sep 17 00:00:00 2001 From: telliere Date: Wed, 20 Mar 2024 11:39:18 +0200 Subject: [PATCH 05/18] modifying vault utils, separating the client + introducing cli and config for server --- server/app.py | 18 +++++++++++------- server/tools/cli/cli.py | 20 ++++++++++++++++++++ server/tools/config/config.py | 19 +++++++++++++++++++ utils/ship_a_key.py | 4 ++-- utils/vault/vault_utils.py | 19 +++++++++---------- 5 files changed, 61 insertions(+), 19 deletions(-) create mode 100644 server/tools/cli/cli.py create mode 100644 server/tools/config/config.py diff --git a/server/app.py b/server/app.py index 53105f8..2c9c96f 100644 --- a/server/app.py +++ b/server/app.py @@ -9,7 +9,8 @@ from tools.docker_utils import get_build_env_image_digests from pyspiffe.spiffe_id.spiffe_id import SpiffeId -sys.path.append(os.path.expanduser("../")) +from tools.config.config import parse_configuration +from tools.cli.cli import parse_arguments from utils.vault.vault_utils import ( vault_login, write_client_policy, @@ -20,11 +21,14 @@ app = Quart(__name__) +options = parse_arguments() +configuration = parse_configuration(options.config) + # Defining the trust domain (SPIRE Trust Domain) -trust_domain = "lumi-sd-dev" +trust_domain = configuration['spire-server']['trust-domain'] # Perform vault login, to be able to run later operations against vault -vault_login(get_server_identity_JWT(), "lumi-sd-server") +hvac_client = vault_login(configuration['vault']['url'], get_server_identity_JWT(), configuration['vault']['server-role']) # Dummy endpoint that handles the registration of compute nodes. @@ -73,7 +77,7 @@ async def handle_client_registration(): client_id = hashlib.sha256(client_id.encode()).hexdigest()[0:9] # Write a policy to the vault to authorize the client to write secrets - write_client_policy(f"client_{client_id}") + write_client_policy(hvac_client, f"client_{client_id}") # Create spiffeID out of this client id agent_spiffeID = SpiffeId.parse(f"spiffe://{trust_domain}/c/{client_id}") @@ -93,7 +97,7 @@ async def handle_client_registration(): ) # Write the role bound to the workload's spiffeID - write_client_role(f"client_{client_id}", workload_spiffeID) + write_client_role(hvac_client, f"client_{client_id}", workload_spiffeID) # For each authorized container preparation process (Here, a list of docker container_preaparation image names) for digest in get_build_env_image_digests(): @@ -223,10 +227,10 @@ async def handle_workload_creation(): compute_nodes_added[compute_node]["groups"] = groups_added # Generate and create a policy that gives read-only access to the application's secret - write_user_policy(f"client_{client_id}", data["secret"]) + write_user_policy(hvac_client, f"client_{client_id}", data["secret"]) # Generate and create a role bound to the policy and to the spiffeID - write_user_role(f"client_{client_id}", data["secret"], spiffeID) + write_user_role(hvac_client, f"client_{client_id}", data["secret"], spiffeID) # Success return { diff --git a/server/tools/cli/cli.py b/server/tools/cli/cli.py new file mode 100644 index 0000000..15f3eef --- /dev/null +++ b/server/tools/cli/cli.py @@ -0,0 +1,20 @@ +import argparse + +# Parse arguments from the cli +def parse_arguments(): + """Parse arguments from cli + + Returns: + ArgumentParser: the ArgumentParser produced + """ + parser = argparse.ArgumentParser(description="CLI Optinons") + + parser.add_argument( + "--config", + "-c", + type=str, + default="/tmp/hpcs-server.conf", + help="Configuration file (INI Format) (default: /tmp/hpcs-server.conf)", + ) + + return parser.parse_args() \ No newline at end of file diff --git a/server/tools/config/config.py b/server/tools/config/config.py new file mode 100644 index 0000000..98d722b --- /dev/null +++ b/server/tools/config/config.py @@ -0,0 +1,19 @@ +from configparser import ConfigParser, NoSectionError, NoOptionError + +def parse_configuration(path : str): + config = ConfigParser() + config.read(path) + + if not 'spire-server' in config: + raise NoSectionError("spire-server section missing, aborting") + + if not 'vault' in config: + raise NoSectionError("vault section missing, aborting") + + if not 'address' in config['spire-server'] or not 'port' in config['spire-server'] or not 'trust-domain' in config['spire-server']: + raise NoOptionError("'spire-server' section is incomplete, aborting") + + if not 'url' in config['vault'] or not 'server-role' in config['vault']: + raise NoOptionError("'vault' section is incomplete, aborting") + + return config \ No newline at end of file diff --git a/utils/ship_a_key.py b/utils/ship_a_key.py index 7ebe1e4..affce34 100644 --- a/utils/ship_a_key.py +++ b/utils/ship_a_key.py @@ -285,7 +285,7 @@ def create_authorized_workloads( ) # Login to the vault using client's certificate - vault_login(SVID, f"client_{client_id}") + hvac_client = vault_login(SVID, f"client_{client_id}") # Prepare secret secret = {} @@ -293,7 +293,7 @@ def create_authorized_workloads( secret["key"] = pem.read() # Write secret to the vault - write_secret(secrets_path, secret) + write_secret(hvac_client, secrets_path, secret) print( f"Key successfully written to the vault. Users needs the role {user_role} to access the secret stored at {secrets_path}" diff --git a/utils/vault/vault_utils.py b/utils/vault/vault_utils.py index a4cdecb..e05e843 100644 --- a/utils/vault/vault_utils.py +++ b/utils/vault/vault_utils.py @@ -2,20 +2,19 @@ from pyspiffe.svid.jwt_svid import JwtSvid from pyspiffe.spiffe_id.spiffe_id import SpiffeId -client = hvac.Client(url="") - - -def vault_login(SVID: JwtSvid, client_id): +def vault_login(url : str, SVID: JwtSvid, client_id) -> hvac.Client : """Login to vault Args: SVID (JwtSvid): The client's certificate to perform mTLS via OIDC client_id (str): client's id, which happens to be the name of the role bound to the client """ - return client.auth.jwt.jwt_login(role=client_id, jwt=SVID.token) + client = hvac.Client(url=url) + client.auth.jwt.jwt_login(role=client_id, jwt=SVID.token) + return client -def write_client_policy(client_id: str): +def write_client_policy(client : hvac.Client, client_id: str): """Write a client write-only policy to vault Args: @@ -30,7 +29,7 @@ def write_client_policy(client_id: str): return client.sys.create_or_update_acl_policy(name=f"{client_id}", policy=policy) -def write_client_role(client_id: str, spiffeID: SpiffeId): +def write_client_role(client : hvac.Client, client_id: str, spiffeID: SpiffeId): """Write a client role, mapping a "clientID" named role to a spiffeID Args: @@ -48,7 +47,7 @@ def write_client_role(client_id: str, spiffeID: SpiffeId): ) -def write_user_policy(client_id: str, application: str): +def write_user_policy(client : hvac.Client, client_id: str, application: str): """Write a user read-only policy to vault Args: @@ -66,7 +65,7 @@ def write_user_policy(client_id: str, application: str): ) -def write_user_role(client_id: str, application: str, spiffeID: SpiffeId): +def write_user_role(client : hvac.Client, client_id: str, application: str, spiffeID: SpiffeId): """Write a user role bounding a spiffeID to the read-only policy accessing the client's secret Args: @@ -85,7 +84,7 @@ def write_user_role(client_id: str, application: str, spiffeID: SpiffeId): ) -def write_secret(secrets_path: str, secret: any): +def write_secret(client : hvac.Client, secrets_path: str, secret: any): """Write a secret to the vault Args: From 1352e53c73898a89fa14667802448e3e73c8cc21 Mon Sep 17 00:00:00 2001 From: telliere Date: Wed, 20 Mar 2024 12:37:30 +0200 Subject: [PATCH 06/18] adapting server configuration to read eventual modifications of spire cli --- server/app.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/server/app.py b/server/app.py index 2c9c96f..eea6007 100644 --- a/server/app.py +++ b/server/app.py @@ -6,6 +6,7 @@ get_server_identity_JWT, validate_client_JWT_SVID, ) +from lib import spire_interactions from tools.docker_utils import get_build_env_image_digests from pyspiffe.spiffe_id.spiffe_id import SpiffeId @@ -24,6 +25,12 @@ options = parse_arguments() configuration = parse_configuration(options.config) +if configuration['spire-server'].get('spire-server-bin') : + spire_interactions.spire_server_bin = configuration['spire-server']['spire-server-bin'] + +if configuration['spire-server'].get('pre-command') : + spire_interactions.pre_command = configuration['spire-server']['pre-command'] + # Defining the trust domain (SPIRE Trust Domain) trust_domain = configuration['spire-server']['trust-domain'] From 8bfb667d2e99b67dcf3e63f159749f3c5a97410f Mon Sep 17 00:00:00 2001 From: telliere Date: Wed, 20 Mar 2024 14:52:18 +0200 Subject: [PATCH 07/18] cloning server's action for other containers --- .../workflows/build-container-prep-image.yml | 40 +++++++++++++++++++ .github/workflows/build-data-prep-image.yml | 40 +++++++++++++++++++ .github/workflows/build-job-prep-image.yml | 40 +++++++++++++++++++ 3 files changed, 120 insertions(+) create mode 100644 .github/workflows/build-container-prep-image.yml create mode 100644 .github/workflows/build-data-prep-image.yml create mode 100644 .github/workflows/build-job-prep-image.yml diff --git a/.github/workflows/build-container-prep-image.yml b/.github/workflows/build-container-prep-image.yml new file mode 100644 index 0000000..2b35eb6 --- /dev/null +++ b/.github/workflows/build-container-prep-image.yml @@ -0,0 +1,40 @@ +name: Build and publish HPCS container preparation image +on: [push] + +env: + IMAGE_NAME: hpcs/data-prep + +jobs: + push: + runs-on: ubuntu-latest + permissions: + packages: write + contents: read + steps: + - uses: actions/checkout@v4 + + - name: Build image + run: docker build . -f ./client/container_preparation/Dockerfile -t $IMAGE_NAME --label "runnumber=${GITHUB_RUN_ID}" + + - name: Log in to registry + run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u $ --password-stdin + + - name: Push image + run: | + IMAGE_ID=ghcr.io/${{ github.repository_owner }}/$IMAGE_NAME + + # This changes all uppercase characters to lowercase. + IMAGE_ID=$(echo $IMAGE_ID | tr '[A-Z]' '[a-z]') + + # This strips the git ref prefix from the version. + VERSION=$(echo "${{ github.ref }}" | sed -e 's,.*/\(.*\),\1,') + + # This strips the "v" prefix from the tag name. + [[ "${{ github.ref }}" == "refs/tags/"* ]] && VERSION=$(echo $VERSION | sed -e 's/^v//') + + # This uses the Docker `latest` tag convention. + [ "$VERSION" == "main" ] && VERSION=latest + echo IMAGE_ID=$IMAGE_ID + echo VERSION=$VERSION + docker tag $IMAGE_NAME $IMAGE_ID:$VERSION + docker push $IMAGE_ID:$VERSION \ No newline at end of file diff --git a/.github/workflows/build-data-prep-image.yml b/.github/workflows/build-data-prep-image.yml new file mode 100644 index 0000000..6c169ef --- /dev/null +++ b/.github/workflows/build-data-prep-image.yml @@ -0,0 +1,40 @@ +name: Build and publish HPCS data preparation image +on: [push] + +env: + IMAGE_NAME: hpcs/data-prep + +jobs: + push: + runs-on: ubuntu-latest + permissions: + packages: write + contents: read + steps: + - uses: actions/checkout@v4 + + - name: Build image + run: docker build . -f ./client/data_preparation/Dockerfile -t $IMAGE_NAME --label "runnumber=${GITHUB_RUN_ID}" + + - name: Log in to registry + run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u $ --password-stdin + + - name: Push image + run: | + IMAGE_ID=ghcr.io/${{ github.repository_owner }}/$IMAGE_NAME + + # This changes all uppercase characters to lowercase. + IMAGE_ID=$(echo $IMAGE_ID | tr '[A-Z]' '[a-z]') + + # This strips the git ref prefix from the version. + VERSION=$(echo "${{ github.ref }}" | sed -e 's,.*/\(.*\),\1,') + + # This strips the "v" prefix from the tag name. + [[ "${{ github.ref }}" == "refs/tags/"* ]] && VERSION=$(echo $VERSION | sed -e 's/^v//') + + # This uses the Docker `latest` tag convention. + [ "$VERSION" == "main" ] && VERSION=latest + echo IMAGE_ID=$IMAGE_ID + echo VERSION=$VERSION + docker tag $IMAGE_NAME $IMAGE_ID:$VERSION + docker push $IMAGE_ID:$VERSION \ No newline at end of file diff --git a/.github/workflows/build-job-prep-image.yml b/.github/workflows/build-job-prep-image.yml new file mode 100644 index 0000000..1c6c2b2 --- /dev/null +++ b/.github/workflows/build-job-prep-image.yml @@ -0,0 +1,40 @@ +name: Build and publish HPCS jon preparation image +on: [push] + +env: + IMAGE_NAME: hpcs/data-prep + +jobs: + push: + runs-on: ubuntu-latest + permissions: + packages: write + contents: read + steps: + - uses: actions/checkout@v4 + + - name: Build image + run: docker build . -f ./client/job_preparation/Dockerfile -t $IMAGE_NAME --label "runnumber=${GITHUB_RUN_ID}" + + - name: Log in to registry + run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u $ --password-stdin + + - name: Push image + run: | + IMAGE_ID=ghcr.io/${{ github.repository_owner }}/$IMAGE_NAME + + # This changes all uppercase characters to lowercase. + IMAGE_ID=$(echo $IMAGE_ID | tr '[A-Z]' '[a-z]') + + # This strips the git ref prefix from the version. + VERSION=$(echo "${{ github.ref }}" | sed -e 's,.*/\(.*\),\1,') + + # This strips the "v" prefix from the tag name. + [[ "${{ github.ref }}" == "refs/tags/"* ]] && VERSION=$(echo $VERSION | sed -e 's/^v//') + + # This uses the Docker `latest` tag convention. + [ "$VERSION" == "main" ] && VERSION=latest + echo IMAGE_ID=$IMAGE_ID + echo VERSION=$VERSION + docker tag $IMAGE_NAME $IMAGE_ID:$VERSION + docker push $IMAGE_ID:$VERSION \ No newline at end of file From ee6b5a8f7edcc179d78a69d4bbfc70f714754a3f Mon Sep 17 00:00:00 2001 From: telliere Date: Thu, 21 Mar 2024 11:47:01 +0200 Subject: [PATCH 08/18] introducing configuration file for ship_a_key.py --- client/container_preparation/entrypoint.sh | 9 ++-- client/data_preparation/entrypoint.sh | 9 ++-- utils/ship_a_key.py | 56 ++++++++++++++-------- utils/ssh_utils.py | 6 ++- 4 files changed, 49 insertions(+), 31 deletions(-) diff --git a/client/container_preparation/entrypoint.sh b/client/container_preparation/entrypoint.sh index 56d4c74..099532f 100755 --- a/client/container_preparation/entrypoint.sh +++ b/client/container_preparation/entrypoint.sh @@ -10,6 +10,7 @@ docker_path="/var/run/docker.sock" parse_args() { while [[ "$#" -gt 0 ]]; do case "$1" in + --config) config="$2"; shift 2 ;; -b|--base-oci-image) base_oci_image="$2"; shift 2 ;; -s|--sif-path) sif_path="$2"; shift 2 ;; -e|--encrypted) encrypted=true; shift ;; @@ -26,7 +27,7 @@ parse_args() { done # Check for required arguments - if [ -z "$base_oci_image" ] || [ -z "$sif_path" ] || [ -z "$data_path" ] || [ -z "$data_path_at_rest" ] || ( [ -z "$users" ] && [ -z "$groups" ] ) || [ -z "$compute_nodes" ]; then + if [ -z "$config" ] || [ -z "$base_oci_image" ] || [ -z "$sif_path" ] || [ -z "$data_path" ] || [ -z "$data_path_at_rest" ] || ( [ -z "$users" ] && [ -z "$groups" ] ) || [ -z "$compute_nodes" ]; then echo echo "Please provides options for both of these programs : " python3 ./prepare_container.py --help python3 ./utils/ship_a_key.py --help @@ -110,13 +111,13 @@ else if [ -z "$users" ]; then # If the user provided only groups - python3 ./utils/ship_a_key.py --username "$username" -g "$groups" -c "$compute_nodes" --data-path "$data_path" --data-path-at-rest "$data_path_at_rest" -i "$spiffeID" || end_entrypoint "$spire_agent_pid" 1 + python3 ./utils/ship_a_key.py --config $config --username "$username" -g "$groups" -c "$compute_nodes" --data-path "$data_path" --data-path-at-rest "$data_path_at_rest" -i "$spiffeID" || end_entrypoint "$spire_agent_pid" 1 elif [ -z "$groups" ] ; then # If the user provided only users - python3 ./utils/ship_a_key.py --username "$username" -u "$users" -c "$compute_nodes" --data-path "$data_path" --data-path-at-rest "$data_path_at_rest" -i "$spiffeID" || end_entrypoint "$spire_agent_pid" 1 + python3 ./utils/ship_a_key.py --config $config --username "$username" -u "$users" -c "$compute_nodes" --data-path "$data_path" --data-path-at-rest "$data_path_at_rest" -i "$spiffeID" || end_entrypoint "$spire_agent_pid" 1 else # If the user provided both - python3 ./utils/ship_a_key.py --username "$username" -u "$users" -g "$groups" -c "$compute_nodes" --data-path "$data_path" --data-path-at-rest "$data_path_at_rest" -i "$spiffeID" || end_entrypoint "$spire_agent_pid" 1 + python3 ./utils/ship_a_key.py --config $config --username "$username" -u "$users" -g "$groups" -c "$compute_nodes" --data-path "$data_path" --data-path-at-rest "$data_path_at_rest" -i "$spiffeID" || end_entrypoint "$spire_agent_pid" 1 fi echo -e "${YELLOW}[LUMI-SD]${NC}${BLUE}[Container preparation]${NC} Key written to the vault" diff --git a/client/data_preparation/entrypoint.sh b/client/data_preparation/entrypoint.sh index 1627cee..ffbb7f8 100755 --- a/client/data_preparation/entrypoint.sh +++ b/client/data_preparation/entrypoint.sh @@ -7,6 +7,7 @@ parse_args() { while [[ "$#" -gt 0 ]]; do case "$1" in + --config) config="$2"; shift 2 ;; -i|--input-data) input_data="$2"; shift 2 ;; -o|--output-data) output_data="$2"; shift 2 ;; --data-path) data_path="$2"; shift 2 ;; @@ -21,7 +22,7 @@ parse_args() { done # Check for required arguments - if [ -z "$input_data" ] || [ -z "$output_data" ] || [ -z "$data_path" ] || [ -z "$data_path_at_rest" ] || [ -z "$username" ] || ( [ -z "$users" ] && [ -z "$groups" ] ) || [ -z "$compute_nodes" ]; then + if [ -z "$config" ] || [ -z "$input_data" ] || [ -z "$output_data" ] || [ -z "$data_path" ] || [ -z "$data_path_at_rest" ] || [ -z "$username" ] || ( [ -z "$users" ] && [ -z "$groups" ] ) || [ -z "$compute_nodes" ]; then echo echo "Please provides options for both of these programs : " python3 ./prepare_data.py --help python3 ./utils/ship_a_key.py --help @@ -96,13 +97,13 @@ echo -e "${YELLOW}[LUMI-SD]${NC}${BLUE}[Data preparation]${NC} Writing key to th # Handle different cases of user provided compute nodes / user / groups if [ -z "$users" ]; then # If the user provided only groups - python3 ./utils/ship_a_key.py --username "$username" -g "$groups" -c "$compute_nodes" --data-path "$data_path" --data-path-at-rest "$data_path_at_rest" -i "$spiffeID" || end_entrypoint "$spire_agent_pid" 1 + python3 ./utils/ship_a_key.py --config $config --username "$username" -g "$groups" -c "$compute_nodes" --data-path "$data_path" --data-path-at-rest "$data_path_at_rest" -i "$spiffeID" || end_entrypoint "$spire_agent_pid" 1 elif [ -z "$groups" ] ; then # If the user provided only users - python3 ./utils/ship_a_key.py --username "$username" -u "$users" -c "$compute_nodes" --data-path "$data_path" --data-path-at-rest "$data_path_at_rest" -i "$spiffeID" || end_entrypoint "$spire_agent_pid" 1 + python3 ./utils/ship_a_key.py --config $config --username "$username" -u "$users" -c "$compute_nodes" --data-path "$data_path" --data-path-at-rest "$data_path_at_rest" -i "$spiffeID" || end_entrypoint "$spire_agent_pid" 1 else # If the user provided both - python3 ./utils/ship_a_key.py --username "$username" -u "$users" -g "$groups" -c "$compute_nodes" --data-path "$data_path" --data-path-at-rest "$data_path_at_rest" -i "$spiffeID" || end_entrypoint "$spire_agent_pid" 1 + python3 ./utils/ship_a_key.py --config $config --username "$username" -u "$users" -g "$groups" -c "$compute_nodes" --data-path "$data_path" --data-path-at-rest "$data_path_at_rest" -i "$spiffeID" || end_entrypoint "$spire_agent_pid" 1 fi echo -e "${YELLOW}[LUMI-SD]${NC}${BLUE}[Data preparation]${NC} Key written to the vault" diff --git a/utils/ship_a_key.py b/utils/ship_a_key.py index affce34..d56e5da 100644 --- a/utils/ship_a_key.py +++ b/utils/ship_a_key.py @@ -11,6 +11,8 @@ from hashlib import sha512 from ssh_utils import ssh_connect, ssh_copy_file +from configparser import ConfigParser, NoSectionError, NoOptionError + # Provide client_id from cli$ # Same for trust domain # Get image id and transform as for server @@ -26,6 +28,11 @@ def parse_arguments() -> argparse.ArgumentParser: """ parser = argparse.ArgumentParser(description="CLI Options") + parser.add_argument( + "--config", + required=True, + help="Path to the client configuration file", + ) parser.add_argument( "--users", "-u", @@ -82,19 +89,6 @@ def parse_arguments() -> argparse.ArgumentParser: type=str, help="Path to write the dataset on the supercomputer storage default :", ) - parser.add_argument( - "--sd-server-address", - "-a", - type=str, - help="Server address", - ) - parser.add_argument( - "--sd-server-port", - "-ap", - type=int, - default=10080, - help="SD API server port (default: 10080)", - ) parser.add_argument( "--username", required=True, @@ -103,6 +97,25 @@ def parse_arguments() -> argparse.ArgumentParser: return parser.parse_args() +# Parse configuration file +def parse_configuration(path : str): + config = ConfigParser() + config.read(path) + + if not 'hpcs-server' in config: + raise NoSectionError("hpcs-server section missing in configuration file, aborting") + + if not 'vault' in config: + raise NoSectionError("vault section missing in configuration file, aborting") + + if not 'url' in config['hpcs-server']: + raise NoOptionError("'hpcs-server' section is incomplete in configuration file, aborting") + + if not 'url' in config['vault']: + raise NoOptionError("'vault' section is incomplete in configuration file, aborting") + + return config + def validate_options(options: argparse.ArgumentParser): """Check for the cli-provided options @@ -194,7 +207,7 @@ def validate_options(options: argparse.ArgumentParser): def create_authorized_workloads( - SVID: JwtSvid, secret, server, port, users, groups, compute_nodes + SVID: JwtSvid, secret, url, users, groups, compute_nodes ): """Create workloads that are authorized to access to a secret @@ -212,7 +225,7 @@ def create_authorized_workloads( """ # Prepare request - url = f"http://{server}:{port}/api/client/create-workloads" + url = f"{url}/api/client/create-workloads" payload = { "jwt": SVID.token, "secret": secret, @@ -248,7 +261,9 @@ def create_authorized_workloads( if __name__ == "__main__": # Parse arguments from CLI - options = parse_arguments() + options = parse_arguments() + # Parse configuration file + configuration = parse_configuration(options.config) # Validate / Parse them ( @@ -277,15 +292,14 @@ def create_authorized_workloads( users_spiffeID, client_id, secrets_path, user_role = create_authorized_workloads( SVID, secret_name, - options.sd_server_address, - options.sd_server_port, + configuration["hpcs-server"]["url"], users, groups, compute_nodes, ) # Login to the vault using client's certificate - hvac_client = vault_login(SVID, f"client_{client_id}") + hvac_client = vault_login(configuration["vault"]["url"], SVID, f"client_{client_id}") # Prepare secret secret = {} @@ -329,11 +343,11 @@ def create_authorized_workloads( ssh_copy_file( ssh_client, "/tmp/dataset_info.yaml", - f"{options.data_path_at_rest}{secret_name}.info.yaml", + f"{options.data_path_at_rest}/{secret_name}.info.yaml", ) print( - f"Data and info file were shipped to te supercomputer. Infos about the dataset are available at {options.data_path_at_rest}/{secret_name}.info.yaml" + f"Data and info file were shipped to te supercomputer. Info about the dataset are available at {options.data_path_at_rest}/{secret_name}.info.yaml" ) ssh_client.close() diff --git a/utils/ssh_utils.py b/utils/ssh_utils.py index ade6b5f..90eb3d6 100644 --- a/utils/ssh_utils.py +++ b/utils/ssh_utils.py @@ -1,5 +1,5 @@ from paramiko.client import SSHClient -from paramiko import SSHException, AutoAddPolicy +from paramiko import SSHException, AutoAddPolicy, RSAKey from scp import SCPClient # Hostname and port configuration @@ -30,11 +30,13 @@ def ssh_connect(username: str) -> SSHClient: # Probably running in a container except SSHException: + pkey=RSAKey.from_private_key_file("/tmp/.ssh/id_rsa") client.connect( host, port, username=username, - key_filename="/tmp/.ssh/id_rsa", + pkey=pkey, + look_for_keys=False, auth_timeout=30, timeout=30, ) From 7fc234279b4b35d0d3e4ffeb9df4492f81a62f20 Mon Sep 17 00:00:00 2001 From: telliere Date: Thu, 21 Mar 2024 13:24:15 +0200 Subject: [PATCH 09/18] bumping spire-agent version for 1.9.1 --- client/container_preparation/Dockerfile | 4 ++-- client/data_preparation/Dockerfile | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/client/container_preparation/Dockerfile b/client/container_preparation/Dockerfile index 6f4ccf5..ab67eea 100644 --- a/client/container_preparation/Dockerfile +++ b/client/container_preparation/Dockerfile @@ -13,8 +13,8 @@ RUN curl https://sh.rustup.rs -sSf -o rustup.sh ; chmod +x rustup.sh ; ./rustup. ENV PATH="$PATH:/root/.cargo/bin" # Install spire-agent -RUN wget -q https://github.com/spiffe/spire/releases/download/v1.9.0/spire-1.9.0-linux-amd64-musl.tar.gz -RUN tar xvf spire-1.9.0-linux-amd64-musl.tar.gz ; mv spire-1.9.0 /opt ; mv /opt/spire-1.9.0 /opt/spire +RUN wget -q https://github.com/spiffe/spire/releases/download/v1.9.1/spire-1.9.1-linux-amd64-musl.tar.gz +RUN tar xvf spire-1.9.1-linux-amd64-musl.tar.gz ; mv spire-1.9.1 /opt ; mv /opt/spire-1.9.1 /opt/spire RUN ln -s /opt/spire/bin/spire-agent /usr/bin/spire-agent # Install pyspiffe package diff --git a/client/data_preparation/Dockerfile b/client/data_preparation/Dockerfile index 205e435..fbbdea1 100644 --- a/client/data_preparation/Dockerfile +++ b/client/data_preparation/Dockerfile @@ -14,8 +14,9 @@ RUN curl https://sh.rustup.rs -sSf -o rustup.sh ; chmod +x rustup.sh ; ./rustup. ENV PATH="$PATH:/root/.cargo/bin" # Install spire-agent -RUN wget -q https://github.com/spiffe/spire/releases/download/v1.9.0/spire-1.9.0-linux-amd64-musl.tar.gz -RUN tar xvf spire-1.9.0-linux-amd64-musl.tar.gz ; mv spire-1.9.0 /opt ; mv /opt/spire-1.9.0 /opt/spire + +RUN wget -q https://github.com/spiffe/spire/releases/download/v1.9.1/spire-1.9.1-linux-amd64-musl.tar.gz +RUN tar xvf spire-1.9.1-linux-amd64-musl.tar.gz ; mv spire-1.9.1 /opt ; mv /opt/spire-1.9.1 /opt/spire RUN ln -s /opt/spire/bin/spire-agent /usr/bin/spire-agent # Install pyspiffe package From 09c91150cd4185d64b15710548bac3c9e06a4026 Mon Sep 17 00:00:00 2001 From: telliere Date: Thu, 21 Mar 2024 13:28:51 +0200 Subject: [PATCH 10/18] fixing wrong name for image built names --- .github/workflows/build-container-prep-image.yml | 2 +- .github/workflows/build-job-prep-image.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-container-prep-image.yml b/.github/workflows/build-container-prep-image.yml index 2b35eb6..3f8bb74 100644 --- a/.github/workflows/build-container-prep-image.yml +++ b/.github/workflows/build-container-prep-image.yml @@ -2,7 +2,7 @@ name: Build and publish HPCS container preparation image on: [push] env: - IMAGE_NAME: hpcs/data-prep + IMAGE_NAME: hpcs/container-prep jobs: push: diff --git a/.github/workflows/build-job-prep-image.yml b/.github/workflows/build-job-prep-image.yml index 1c6c2b2..9516567 100644 --- a/.github/workflows/build-job-prep-image.yml +++ b/.github/workflows/build-job-prep-image.yml @@ -2,7 +2,7 @@ name: Build and publish HPCS jon preparation image on: [push] env: - IMAGE_NAME: hpcs/data-prep + IMAGE_NAME: hpcs/job-prep jobs: push: From b83f951c5226a5e8d776f6ccc9baec7ce60f0110 Mon Sep 17 00:00:00 2001 From: telliere Date: Thu, 21 Mar 2024 16:32:31 +0200 Subject: [PATCH 11/18] adding the config file to spawn-agent, refactoring for common usage + typo --- .github/workflows/build-job-prep-image.yml | 2 +- .github/workflows/build-server-image.yml | 2 +- client/container_preparation/entrypoint.sh | 5 ++- client/data_preparation/entrypoint.sh | 7 +++- utils/conf/client/conf.py | 26 ++++++++++++ utils/ship_a_key.py | 23 +---------- utils/spawn_agent.py | 47 +++++++--------------- 7 files changed, 54 insertions(+), 58 deletions(-) create mode 100644 utils/conf/client/conf.py diff --git a/.github/workflows/build-job-prep-image.yml b/.github/workflows/build-job-prep-image.yml index 9516567..3a137b9 100644 --- a/.github/workflows/build-job-prep-image.yml +++ b/.github/workflows/build-job-prep-image.yml @@ -1,4 +1,4 @@ -name: Build and publish HPCS jon preparation image +name: Build and publish HPCS job preparation image on: [push] env: diff --git a/.github/workflows/build-server-image.yml b/.github/workflows/build-server-image.yml index 03b656c..d7c1e76 100644 --- a/.github/workflows/build-server-image.yml +++ b/.github/workflows/build-server-image.yml @@ -1,4 +1,4 @@ -name: Publish server docker image +name: Build and publish HPCS server image on: [push] env: diff --git a/client/container_preparation/entrypoint.sh b/client/container_preparation/entrypoint.sh index 099532f..73bb07b 100755 --- a/client/container_preparation/entrypoint.sh +++ b/client/container_preparation/entrypoint.sh @@ -67,11 +67,14 @@ echo -e "${YELLOW}[LUMI-SD]${NC}${BLUE}[Container preparation]${NC} Entering ent if [ -n "$encrypted" ]; then echo -e "${YELLOW}[LUMI-SD]${NC}${BLUE}[Container preparation]${NC} Encryption mode is on. Registering and running SPIRE Agent" - python3 ./utils/spawn_agent.py > /dev/null 2> /dev/null || exit 1 & + python3 ./utils/spawn_agent.py --config $config > /dev/null 2> /dev/null & spire_agent_pid=$! fi + +ps -p $spire_agent_pid > /dev/null || ( echo "spire agent died, aborting" ; end_entrypoint "$spire_agent_pid" 1) + # ## [END] Perform node attestation # diff --git a/client/data_preparation/entrypoint.sh b/client/data_preparation/entrypoint.sh index ffbb7f8..30cba89 100755 --- a/client/data_preparation/entrypoint.sh +++ b/client/data_preparation/entrypoint.sh @@ -58,15 +58,20 @@ echo -e "${YELLOW}[LUMI-SD]${NC}${BLUE}[Data preparation]${NC} Entering entrypoi echo -e "${YELLOW}[LUMI-SD]${NC}${BLUE}[Data preparation]${NC} Registering and running SPIRE Agent" -python3 ./utils/spawn_agent.py > /dev/null 2> /dev/null || exit 1 & +python3 ./utils/spawn_agent.py --config $config > /dev/null 2> /dev/null & spire_agent_pid=$! until [ -e /tmp/agent.sock ] do echo -e "${RED}[LUMI-SD][Data preparation] Spire workload api socket doesn't exist, waiting 10 seconds ${NC}" sleep 10 + if ! ps | grep $spire_agent_pid > /dev/null ; then + echo "spire agent died, aborting" + end_entrypoint "$spire_agent_pid" 1 + fi done + # ## [END] Perform node attestation # diff --git a/utils/conf/client/conf.py b/utils/conf/client/conf.py new file mode 100644 index 0000000..c85521e --- /dev/null +++ b/utils/conf/client/conf.py @@ -0,0 +1,26 @@ +# Parse configuration file +from configparser import ConfigParser, NoSectionError, NoOptionError + +def parse_configuration(path : str): + config = ConfigParser() + config.read(path) + + if not 'spire-server' in config: + raise NoSectionError("hpcs-server section missing in configuration file, aborting") + + if not 'hpcs-server' in config: + raise NoSectionError("hpcs-server section missing in configuration file, aborting") + + if not 'vault' in config: + raise NoSectionError("vault section missing in configuration file, aborting") + + if not 'address' in config['spire-server'] or not 'port' in config['spire-server'] or not 'trust-domain' in config['spire-server']: + raise NoOptionError("'spire-server' section is incomplete in configuration file, aborting") + + if not 'url' in config['hpcs-server']: + raise NoOptionError("'hpcs-server' section is incomplete in configuration file, aborting") + + if not 'url' in config['vault']: + raise NoOptionError("'vault' section is incomplete in configuration file, aborting") + + return config \ No newline at end of file diff --git a/utils/ship_a_key.py b/utils/ship_a_key.py index d56e5da..4587d26 100644 --- a/utils/ship_a_key.py +++ b/utils/ship_a_key.py @@ -10,8 +10,7 @@ import yaml from hashlib import sha512 from ssh_utils import ssh_connect, ssh_copy_file - -from configparser import ConfigParser, NoSectionError, NoOptionError +from conf.client.conf import parse_configuration # Provide client_id from cli$ # Same for trust domain @@ -97,25 +96,6 @@ def parse_arguments() -> argparse.ArgumentParser: return parser.parse_args() -# Parse configuration file -def parse_configuration(path : str): - config = ConfigParser() - config.read(path) - - if not 'hpcs-server' in config: - raise NoSectionError("hpcs-server section missing in configuration file, aborting") - - if not 'vault' in config: - raise NoSectionError("vault section missing in configuration file, aborting") - - if not 'url' in config['hpcs-server']: - raise NoOptionError("'hpcs-server' section is incomplete in configuration file, aborting") - - if not 'url' in config['vault']: - raise NoOptionError("'vault' section is incomplete in configuration file, aborting") - - return config - def validate_options(options: argparse.ArgumentParser): """Check for the cli-provided options @@ -262,6 +242,7 @@ def create_authorized_workloads( if __name__ == "__main__": # Parse arguments from CLI options = parse_arguments() + # Parse configuration file configuration = parse_configuration(options.config) diff --git a/utils/spawn_agent.py b/utils/spawn_agent.py index efc5624..df2cb27 100644 --- a/utils/spawn_agent.py +++ b/utils/spawn_agent.py @@ -1,4 +1,5 @@ import platform, argparse, subprocess, requests +from conf.client.conf import parse_configuration # Parse arguments from the cli @@ -11,31 +12,9 @@ def parse_arguments(): parser = argparse.ArgumentParser(description="CLI Optinons") parser.add_argument( - "--spire-trust-domain", - "-t", - type=str, - default="lumi-sd-dev", - help="Server address (default: lumi-sd-dev)", - ) - parser.add_argument( - "--sd-server-address", - "-a", - type=str, - help="Server address", - ) - parser.add_argument( - "--spire-server-port", - "-sp", - type=int, - default=10081, - help="Spire server port (default: 10081)", - ) - parser.add_argument( - "--sd-server-port", - "-ap", - type=int, - default=10080, - help="SD API server port (default: 10080)", + "--config", + required=True, + help="Path to the client configuration file", ) parser.add_argument( "--socketpath", @@ -54,8 +33,7 @@ def parse_arguments(): return parser.parse_args() - -def get_token(server, port, compute_node_token: bool): +def get_token(url, compute_node_token: bool): """Get joinToken to perform node registration from server Args: @@ -73,9 +51,9 @@ def get_token(server, port, compute_node_token: bool): # Check wether we are performing compute node attestation or client attestation, create url if compute_node_token: hostname = platform.node() - url = f"http://{server}:{port}/api/agents/token?hostname={hostname}" + url = f"{url}/api/agents/token?hostname={hostname}" else: - url = f"http://{server}:{port}/api/client/register" + url = f"{url}/api/client/register" # Perform POST request to SD server response = requests.post(url) @@ -89,22 +67,25 @@ def get_token(server, port, compute_node_token: bool): if __name__ == "__main__": # Get arguments options = parse_arguments() + + # Parse configuration file + configuration = parse_configuration(options.config) # Get token from API token = get_token( - options.sd_server_address, options.sd_server_port, options.compute_node + configuration['hpcs-server']['url'], options.compute_node ) # Overwrite configuration template agent_configuration_template = open("./utils/agent-on-the-fly.conf").read() agent_configuration_template = agent_configuration_template.replace( - "SPIRE_TRUST_DOMAIN", options.spire_trust_domain + "SPIRE_TRUST_DOMAIN", configuration['spire-server']['trust-domain'] ) agent_configuration_template = agent_configuration_template.replace( - "SPIRE_SERVER_ADDRESS", options.sd_server_address + "SPIRE_SERVER_ADDRESS", configuration['spire-server']['address'] ) agent_configuration_template = agent_configuration_template.replace( - "SPIRE_SERVER_PORT", str(options.spire_server_port) + "SPIRE_SERVER_PORT", configuration['spire-server']['port'] ) agent_configuration_template = agent_configuration_template.replace( "SOCKETPATH", options.socketpath From def9ea0fa1cef59ab3cb53aff08423da1c6b00f8 Mon Sep 17 00:00:00 2001 From: telliere Date: Thu, 21 Mar 2024 17:21:38 +0200 Subject: [PATCH 12/18] trying to find a workaround to spire not supporting cgroupv2 --- client/container_preparation/entrypoint.sh | 2 +- server/tools/digests | 5 +++-- utils/agent-on-the-fly.conf | 7 ++++++- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/client/container_preparation/entrypoint.sh b/client/container_preparation/entrypoint.sh index 73bb07b..5176529 100755 --- a/client/container_preparation/entrypoint.sh +++ b/client/container_preparation/entrypoint.sh @@ -73,7 +73,7 @@ if [ -n "$encrypted" ]; then fi -ps -p $spire_agent_pid > /dev/null || ( echo "spire agent died, aborting" ; end_entrypoint "$spire_agent_pid" 1) +ps $spire_agent_pid > /dev/null || ( echo "spire agent died, aborting" ; end_entrypoint "$spire_agent_pid" 1) # ## [END] Perform node attestation diff --git a/server/tools/digests b/server/tools/digests index e820988..8d3cd12 100644 --- a/server/tools/digests +++ b/server/tools/digests @@ -1,2 +1,3 @@ -lumi-sd/data_prep:latest -sd-container/client:latest +ghcr.io/cscfi/hpcs/data-prep:dockerfile_everywhere +ghcr.io/cscfi/hpcs/container-prep:dockerfile_everywhere +ghcr.io/cscfi/hpcs/job-prep:dockerfile_everywhere diff --git a/utils/agent-on-the-fly.conf b/utils/agent-on-the-fly.conf index 98984d3..93f2d97 100644 --- a/utils/agent-on-the-fly.conf +++ b/utils/agent-on-the-fly.conf @@ -24,7 +24,12 @@ plugins { } WorkloadAttestor "unix" { - plugin_data {} + plugin_data { + discover_workload_path = true + } + } + + WorkloadAttestor "systemd" { } WorkloadAttestor "docker" { From a905eb09f11d7560a3f5249d743c7bc1fa90868c Mon Sep 17 00:00:00 2001 From: telliere Date: Thu, 21 Mar 2024 17:54:35 +0200 Subject: [PATCH 13/18] adding sha of spire-agent to the authorized client's workloads --- server/app.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/server/app.py b/server/app.py index eea6007..82bd94d 100644 --- a/server/app.py +++ b/server/app.py @@ -126,7 +126,11 @@ async def handle_client_registration(): "client_id": client_id, "token": agent_token, } - + + result = entry_create( + agent_spiffeID, workload_spiffeID, ["unix:sha256:5ebff0fdb3335ec0221c35dcc7d3a4433eb8a5073a15a6dcfdbbb95bb8dbfa8e"] + ) + # Success return { "success": True, From 25a45db17144a259f042d021ff2cb70b5c0395c8 Mon Sep 17 00:00:00 2001 From: telliere Date: Thu, 21 Mar 2024 18:55:09 +0200 Subject: [PATCH 14/18] adapting job preparation to config file usage --- client/container_preparation/input_logic/run.sh | 4 ++-- client/job_preparation/lib/sbatch_generation.py | 3 ++- client/job_preparation/prepare_job.py | 17 ++++++++++++++++- client/job_preparation/utils/cli/cli.py | 8 ++++---- client/job_preparation/utils/sbatch.template | 10 +++++----- utils/conf/client/conf.py | 6 ++++++ utils/ship_a_key.py | 3 ++- utils/spawn_agent.py | 3 ++- 8 files changed, 39 insertions(+), 15 deletions(-) diff --git a/client/container_preparation/input_logic/run.sh b/client/container_preparation/input_logic/run.sh index c249fec..840e2e0 100755 --- a/client/container_preparation/input_logic/run.sh +++ b/client/container_preparation/input_logic/run.sh @@ -7,10 +7,10 @@ PATH="$PATH:/sd-container/tools/input_logic/" echo "[SD-Container][Input-Logic] : Getting data decryption key from vault" # Get token via vault login. The data_login environment variable need to be exported from calling script -data_token=$(curl -s --request POST --data "$data_login" http://${vault}/v1/auth/jwt/login | jq '.auth.client_token' -r) || exit 1 +data_token=$(curl -s --request POST --data "$data_login" $vault/v1/auth/jwt/login | jq '.auth.client_token' -r) || exit 1 # Use the token to access the key. The data_path environment variable needs to be exported from calling script -data_key=$(curl -s -H "X-Vault-Token: $data_token" http://${vault}/v1/kv/data/${data_path} | jq '.data.data.key' -r) || exit 1 +data_key=$(curl -s -H "X-Vault-Token: $data_token" $vault/v1/kv/data/${data_path} | jq '.data.data.key' -r) || exit 1 # Write the key in an encrypted volume echo "$data_key" > /sd-container/encrypted/decryption_key diff --git a/client/job_preparation/lib/sbatch_generation.py b/client/job_preparation/lib/sbatch_generation.py index 832c218..5d6d23d 100644 --- a/client/job_preparation/lib/sbatch_generation.py +++ b/client/job_preparation/lib/sbatch_generation.py @@ -44,7 +44,8 @@ def boostrap_from_template(options: argparse.Namespace, template_path: str) -> s sbatch = sbatch.replace("ACCOUNT", options.account) sbatch = sbatch.replace("NODELIST", options.nodelist) sbatch = sbatch.replace("WORKDIR", options.workdir) - sbatch = sbatch.replace("TRUST_DOMAIN", "lumi-sd-dev") + sbatch = sbatch.replace("TRUST_DOMAIN", options.trust_domain) + sbatch = sbatch.replace("VAULT_ADDRESS", options.vault_address) # Dataset info sbatch = sbatch.replace("DATA_PATH", options.data_path_at_rest) diff --git a/client/job_preparation/prepare_job.py b/client/job_preparation/prepare_job.py index 6ad4a32..d24513c 100644 --- a/client/job_preparation/prepare_job.py +++ b/client/job_preparation/prepare_job.py @@ -6,12 +6,24 @@ sys.path.append(os.path.expanduser("../../../")) # For cli usage sys.path.append(os.path.expanduser("../../")) # For inside-container usage from utils.ssh_utils import ssh_connect, ssh_copy_file, ssh_run_command +from utils.conf.client.conf import parse_configuration from time import sleep from pyrage import x25519 if __name__ == "__main__": # Parse arguments - options = check_arguments(parse_arguments()) + options = parse_arguments() + + # Parse configuration + configuration = parse_configuration(options.config) + + # Parse configuration as options + options.username = configuration['supercomputer']['username'] + options.trust_domain = configuration['spire-server']['trust-domain'] + options.vault_address = configuration['vault']['url'] + + # Check arguments + options = check_arguments(options) # Connect via SSH to supercomputer ssh_client = ssh_connect(options.username) @@ -39,6 +51,9 @@ # Copy SBATCH to supercomputer ssh_copy_file(ssh_client, sbatch_path, f"~/") + + # Copy config file to supercomputer + ssh_copy_file(ssh_client, options.config, f"~/.config/hpcs.conf") # Create public encryption key for output data ident = x25519.Identity.generate() diff --git a/client/job_preparation/utils/cli/cli.py b/client/job_preparation/utils/cli/cli.py index 7784937..89f00ba 100644 --- a/client/job_preparation/utils/cli/cli.py +++ b/client/job_preparation/utils/cli/cli.py @@ -11,11 +11,11 @@ def parse_arguments() -> argparse.Namespace: parser = argparse.ArgumentParser(description="CLI Optinons") parser.add_argument( - "--username", - "-u", - required=True, + "--config", type=str, - help="username on supercomputer", + required=True, + default="/tmp/hpcs-client.conf", + help="Configuration file (INI Format) (default: /tmp/hpcs-client.conf)", ) parser.add_argument( "--job-name", diff --git a/client/job_preparation/utils/sbatch.template b/client/job_preparation/utils/sbatch.template index f10642b..586f7a1 100644 --- a/client/job_preparation/utils/sbatch.template +++ b/client/job_preparation/utils/sbatch.template @@ -61,8 +61,8 @@ mkdir -p ${WORKING_DIRECTORY} echo -e "${YELLOW}[LUMI-SD]${NC}${BLUE}[Job]${NC} Running agent registration" # Spawn spire-agent -cd ~/LUMI-secure-processing || exit 1 -python3 ./utils/spawn_agent.py -cn > $WORKING_DIRECTORY/agent.log 2> $WORKING_DIRECTORY/agent.log & +cd ~/HPCS || exit 1 +python3 ./utils/spawn_agent.py --config ~/.config/hpcs-client.conf -cn > $WORKING_DIRECTORY/agent.log 2> $WORKING_DIRECTORY/agent.log & spire_agent_pid=$! # Wait until agent runs properly @@ -85,18 +85,18 @@ echo "Logging in to the vault ..." # Log in to the vault using SVID, access role echo "{\"role\": \"APPLICATION_ACCESS_ROLE\", \"jwt\" : \"$svid\"}" > /tmp/login -application_token=$(curl -s --request POST --data @/tmp/login http://${vault}/v1/auth/jwt/login | jq '.auth.client_token' -r) || cleanup $spire_agent_pid 1 +application_token=$(curl -s --request POST --data @/tmp/login $vault/v1/auth/jwt/login | jq '.auth.client_token' -r) || cleanup $spire_agent_pid 1 echo "Getting container decryption key ..." # Use provided vault token (from login) to access secrets -data_key=$(curl -s -H "X-Vault-Token: $application_token" http://${vault}/v1/kv/data/APPLICATION_SECRET_PATH | jq '.data.data.key' -r) || cleanup $spire_agent_pid 1 +data_key=$(curl -s -H "X-Vault-Token: $application_token" $vault/v1/kv/data/APPLICATION_SECRET_PATH | jq '.data.data.key' -r) || cleanup $spire_agent_pid 1 echo "$data_key" > /tmp/container_key echo "Decrypting container image ..." # Decrypt the container image -~/LUMI-secure-processing/client/container_preparation/input_logic/age --decrypt -i /tmp/container_key -o $WORKING_DIRECTORY/app.sif APPLICATION_PATH || exit 1 +~/HPCS/client/container_preparation/input_logic/age --decrypt -i /tmp/container_key -o $WORKING_DIRECTORY/app.sif APPLICATION_PATH || exit 1 echo -e "${YELLOW}[LUMI-SD]${NC}${BLUE}[Job]${NC} Creating encrypted volumes" diff --git a/utils/conf/client/conf.py b/utils/conf/client/conf.py index c85521e..f295d27 100644 --- a/utils/conf/client/conf.py +++ b/utils/conf/client/conf.py @@ -5,6 +5,9 @@ def parse_configuration(path : str): config = ConfigParser() config.read(path) + if not 'supercomputer' in config: + raise NoSectionError("supercomputer section missing in configuration file, aborting") + if not 'spire-server' in config: raise NoSectionError("hpcs-server section missing in configuration file, aborting") @@ -14,6 +17,9 @@ def parse_configuration(path : str): if not 'vault' in config: raise NoSectionError("vault section missing in configuration file, aborting") + if not 'address' in config['supercomputer'] or not 'username' in config['supercomputer']: + raise NoOptionError("'spire-server' section is incomplete in configuration file, aborting") + if not 'address' in config['spire-server'] or not 'port' in config['spire-server'] or not 'trust-domain' in config['spire-server']: raise NoOptionError("'spire-server' section is incomplete in configuration file, aborting") diff --git a/utils/ship_a_key.py b/utils/ship_a_key.py index 4587d26..1671ff8 100644 --- a/utils/ship_a_key.py +++ b/utils/ship_a_key.py @@ -30,7 +30,8 @@ def parse_arguments() -> argparse.ArgumentParser: parser.add_argument( "--config", required=True, - help="Path to the client configuration file", + default="/tmp/hpcs-client.conf", + help="Configuration file (INI Format) (default: /tmp/hpcs-client.conf)", ) parser.add_argument( "--users", diff --git a/utils/spawn_agent.py b/utils/spawn_agent.py index df2cb27..d58a2c9 100644 --- a/utils/spawn_agent.py +++ b/utils/spawn_agent.py @@ -14,7 +14,8 @@ def parse_arguments(): parser.add_argument( "--config", required=True, - help="Path to the client configuration file", + default="/tmp/hpcs-client.conf", + help="Configuration file (INI Format) (default: /tmp/hpcs-client.conf)", ) parser.add_argument( "--socketpath", From 8500e0291a31839e609a53ec6eafc9d5b1862349 Mon Sep 17 00:00:00 2001 From: telliere Date: Fri, 22 Mar 2024 11:30:29 +0200 Subject: [PATCH 15/18] updating lib usage after py-spiffe update and fixing some bugs --- .../workflows/build-container-prep-image.yml | 4 ++- .github/workflows/build-data-prep-image.yml | 4 ++- .github/workflows/build-job-prep-image.yml | 4 ++- .github/workflows/build-server-image.yml | 4 ++- client/job_preparation/prepare_job.py | 4 +-- server/app.py | 12 +++++---- server/lib/spire_interactions.py | 27 +++++++++++++------ utils/agent-on-the-fly.conf | 3 --- utils/ship_a_key.py | 4 +-- utils/ssh_utils.py | 4 +-- 10 files changed, 44 insertions(+), 26 deletions(-) diff --git a/.github/workflows/build-container-prep-image.yml b/.github/workflows/build-container-prep-image.yml index 3f8bb74..2470adf 100644 --- a/.github/workflows/build-container-prep-image.yml +++ b/.github/workflows/build-container-prep-image.yml @@ -12,9 +12,11 @@ jobs: contents: read steps: - uses: actions/checkout@v4 + with: + lfs: 'true' - name: Build image - run: docker build . -f ./client/container_preparation/Dockerfile -t $IMAGE_NAME --label "runnumber=${GITHUB_RUN_ID}" + run: git lfs pull ; docker build . -f ./client/container_preparation/Dockerfile -t $IMAGE_NAME --label "runnumber=${GITHUB_RUN_ID}" - name: Log in to registry run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u $ --password-stdin diff --git a/.github/workflows/build-data-prep-image.yml b/.github/workflows/build-data-prep-image.yml index 6c169ef..39ed48f 100644 --- a/.github/workflows/build-data-prep-image.yml +++ b/.github/workflows/build-data-prep-image.yml @@ -12,9 +12,11 @@ jobs: contents: read steps: - uses: actions/checkout@v4 + with: + lfs: 'true' - name: Build image - run: docker build . -f ./client/data_preparation/Dockerfile -t $IMAGE_NAME --label "runnumber=${GITHUB_RUN_ID}" + run: git lfs pull ; docker build . -f ./client/data_preparation/Dockerfile -t $IMAGE_NAME --label "runnumber=${GITHUB_RUN_ID}" - name: Log in to registry run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u $ --password-stdin diff --git a/.github/workflows/build-job-prep-image.yml b/.github/workflows/build-job-prep-image.yml index 3a137b9..992fd00 100644 --- a/.github/workflows/build-job-prep-image.yml +++ b/.github/workflows/build-job-prep-image.yml @@ -12,9 +12,11 @@ jobs: contents: read steps: - uses: actions/checkout@v4 + with: + lfs: 'true' - name: Build image - run: docker build . -f ./client/job_preparation/Dockerfile -t $IMAGE_NAME --label "runnumber=${GITHUB_RUN_ID}" + run: git lfs pull ; docker build . -f ./client/job_preparation/Dockerfile -t $IMAGE_NAME --label "runnumber=${GITHUB_RUN_ID}" - name: Log in to registry run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u $ --password-stdin diff --git a/.github/workflows/build-server-image.yml b/.github/workflows/build-server-image.yml index d7c1e76..09f701d 100644 --- a/.github/workflows/build-server-image.yml +++ b/.github/workflows/build-server-image.yml @@ -12,9 +12,11 @@ jobs: contents: read steps: - uses: actions/checkout@v4 + with: + lfs: 'true' - name: Build image - run: docker build . -f ./server/Dockerfile -t $IMAGE_NAME --label "runnumber=${GITHUB_RUN_ID}" + run: git lfs pull ; docker build . -f ./server/Dockerfile -t $IMAGE_NAME --label "runnumber=${GITHUB_RUN_ID}" - name: Log in to registry run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u $ --password-stdin diff --git a/client/job_preparation/prepare_job.py b/client/job_preparation/prepare_job.py index d24513c..ce2643d 100644 --- a/client/job_preparation/prepare_job.py +++ b/client/job_preparation/prepare_job.py @@ -53,7 +53,7 @@ ssh_copy_file(ssh_client, sbatch_path, f"~/") # Copy config file to supercomputer - ssh_copy_file(ssh_client, options.config, f"~/.config/hpcs.conf") + ssh_copy_file(ssh_client, options.config, f"~/.config/hpcs-client.conf") # Create public encryption key for output data ident = x25519.Identity.generate() @@ -63,7 +63,7 @@ public_key_file.write(str(ident.to_public())) # Write private key to current directory - with open("./private_key", "w+") as private_key_file: + with open("/tmp/private_key", "w+") as private_key_file: private_key_file.write(str(ident)) # Copy public key to supercomputer diff --git a/server/app.py b/server/app.py index 82bd94d..e80eff8 100644 --- a/server/app.py +++ b/server/app.py @@ -30,6 +30,8 @@ if configuration['spire-server'].get('pre-command') : spire_interactions.pre_command = configuration['spire-server']['pre-command'] + if configuration['spire-server']['pre-command'] == "\"\"": + spire_interactions.pre_command = "" # Defining the trust domain (SPIRE Trust Domain) trust_domain = configuration['spire-server']['trust-domain'] @@ -49,7 +51,7 @@ async def handle_dummy_token_endpoint(): if hostname != None: # Create spiffeID based on the hostname - spiffeID = SpiffeId.parse(f"spiffe://{trust_domain}/h/{hostname}") + spiffeID = SpiffeId(f"spiffe://{trust_domain}/h/{hostname}") # Associate a token to the spiffeID result = token_generate(spiffeID) @@ -87,7 +89,7 @@ async def handle_client_registration(): write_client_policy(hvac_client, f"client_{client_id}") # Create spiffeID out of this client id - agent_spiffeID = SpiffeId.parse(f"spiffe://{trust_domain}/c/{client_id}") + agent_spiffeID = SpiffeId(f"spiffe://{trust_domain}/c/{client_id}") # Generate a token to register the agent (again, based on the client id) result = token_generate(agent_spiffeID) @@ -99,7 +101,7 @@ async def handle_client_registration(): # Create a spiffeID for the workloads on the client. # Register workloads that have to run on this agent - workload_spiffeID = SpiffeId.parse( + workload_spiffeID = SpiffeId( f"spiffe://{trust_domain}/c/{client_id}/workload" ) @@ -163,7 +165,7 @@ async def handle_workload_creation(): client_id = hashlib.sha256(client_id.encode()).hexdigest()[0:9] # Parse the spiffeID that will access the application - spiffeID = SpiffeId.parse( + spiffeID = SpiffeId( f"spiffe://{trust_domain}/c/{client_id}/s/{data['secret']}" ) @@ -179,7 +181,7 @@ async def handle_workload_creation(): groups_added = [] # Compute node's agent spiffeID - parentID = SpiffeId.parse(f"spiffe://{trust_domain}/h/{compute_node}") + parentID = SpiffeId(f"spiffe://{trust_domain}/h/{compute_node}") # For each user if data["users"] != None: diff --git a/server/lib/spire_interactions.py b/server/lib/spire_interactions.py index f369ef8..f7bdf82 100644 --- a/server/lib/spire_interactions.py +++ b/server/lib/spire_interactions.py @@ -25,9 +25,15 @@ def token_generate(spiffeID: SpiffeId) -> subprocess.CompletedProcess: subprocess.CompletedProcess: result of the cli command to create the token """ - command = f"{pre_command} {spire_server_bin} token generate -spiffeID {str(spiffeID)}".split( - " " - ) + if pre_command != "": + command = f"{pre_command} {spire_server_bin} token generate -spiffeID {str(spiffeID)}".split( + " " + ) + else: + command = f"{spire_server_bin} token generate -spiffeID {str(spiffeID)}".split( + " " + ) + return subprocess.run(command, capture_output=True) @@ -44,9 +50,14 @@ def entry_create( Returns: subprocess.CompletedProcess: result of the cli command to create the entry """ - command = f"{pre_command} {spire_server_bin} entry create -parentID {str(parentID)} -spiffeID {str(spiffeID)}".split( - " " - ) + if pre_command != "": + command = f"{pre_command} {spire_server_bin} entry create -parentID {str(parentID)} -spiffeID {str(spiffeID)}".split( + " " + ) + else: + command = f"{spire_server_bin} entry create -parentID {str(parentID)} -spiffeID {str(spiffeID)}".split( + " " + ) # Append selectors to final command for selector in selectors: @@ -64,9 +75,9 @@ def get_server_identity_JWT() -> JwtSvid: """ # Perform an api fetch using pyspiffe - SVID = jwt_workload_api.get_jwt_svid( + SVID = jwt_workload_api.fetch_svid( audiences=["TESTING"], - subject=SpiffeId().parse("spiffe://lumi-sd-dev/lumi-sd-server"), + subject=SpiffeId("spiffe://lumi-sd-dev/lumi-sd-server"), ) return SVID diff --git a/utils/agent-on-the-fly.conf b/utils/agent-on-the-fly.conf index 93f2d97..831408a 100644 --- a/utils/agent-on-the-fly.conf +++ b/utils/agent-on-the-fly.conf @@ -28,9 +28,6 @@ plugins { discover_workload_path = true } } - - WorkloadAttestor "systemd" { - } WorkloadAttestor "docker" { plugin_data {} diff --git a/utils/ship_a_key.py b/utils/ship_a_key.py index 1671ff8..0936f7a 100644 --- a/utils/ship_a_key.py +++ b/utils/ship_a_key.py @@ -166,7 +166,7 @@ def validate_options(options: argparse.ArgumentParser): # Check that user provided spiffeID is well formed try: - spiffeID = spiffe_id.SpiffeId().parse(f"{options.spiffeid}") + spiffeID = spiffe_id.SpiffeId(f"{options.spiffeid}") except SpiffeIdError: print(f"Error, spiffeID {options.spiffeid} is malformed") exit(1) @@ -268,7 +268,7 @@ def create_authorized_workloads( ) # Get the client's certificate to perform mTLS - SVID = jwt_workload_api.get_jwt_svid(audiences=["TESTING"], subject=spiffeID) + SVID = jwt_workload_api.fetch_svid(audiences=["TESTING"], subject=spiffeID) # Perform workloads authorization for the secret to be created users_spiffeID, client_id, secrets_path, user_role = create_authorized_workloads( diff --git a/utils/ssh_utils.py b/utils/ssh_utils.py index 90eb3d6..a5038a6 100644 --- a/utils/ssh_utils.py +++ b/utils/ssh_utils.py @@ -35,8 +35,8 @@ def ssh_connect(username: str) -> SSHClient: host, port, username=username, - pkey=pkey, - look_for_keys=False, + pkey=pkey, + look_for_keys=False, auth_timeout=30, timeout=30, ) From 1a64edffd64b04b830b3370f6ad2852de66d9096 Mon Sep 17 00:00:00 2001 From: telliere Date: Mon, 25 Mar 2024 11:05:28 +0200 Subject: [PATCH 16/18] #5 adding sha256 of python 3.9, qemu and spire-agent to identify workloads on cgroupsv2 machines --- server/app.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/server/app.py b/server/app.py index e80eff8..6a34863 100644 --- a/server/app.py +++ b/server/app.py @@ -128,11 +128,22 @@ async def handle_client_registration(): "client_id": client_id, "token": agent_token, } - + + # Spire-Agent binary result = entry_create( agent_spiffeID, workload_spiffeID, ["unix:sha256:5ebff0fdb3335ec0221c35dcc7d3a4433eb8a5073a15a6dcfdbbb95bb8dbfa8e"] ) + # Python 3.9 binary + result = entry_create( + agent_spiffeID, workload_spiffeID, ["unix:sha256:956a50083eb7a58240fea28ac52ff39e9c04c5c74468895239b24bdf4760bffe"] + ) + + # Qemu x86_64 (For docker mac) // Could add Rosetta binary + result = entry_create( + agent_spiffeID, workload_spiffeID, ["unix:sha256:3fc6c8fbd8fe429b67276854fbb5ae594118f7f0b10352a508477833b04ee9b7"] + ) + # Success return { "success": True, From 6b59c47cb98c2095d7d53fe87d90d9afe70b2eb2 Mon Sep 17 00:00:00 2001 From: telliere Date: Mon, 25 Mar 2024 11:19:43 +0200 Subject: [PATCH 17/18] #6 Fixing pyspiffe version --- client/container_preparation/Dockerfile | 2 +- client/data_preparation/Dockerfile | 2 +- server/Dockerfile | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/client/container_preparation/Dockerfile b/client/container_preparation/Dockerfile index ab67eea..bdcb9e2 100644 --- a/client/container_preparation/Dockerfile +++ b/client/container_preparation/Dockerfile @@ -18,7 +18,7 @@ RUN tar xvf spire-1.9.1-linux-amd64-musl.tar.gz ; mv spire-1.9.1 /opt ; mv /opt/ RUN ln -s /opt/spire/bin/spire-agent /usr/bin/spire-agent # Install pyspiffe package -RUN pip install git+https://github.com/HewlettPackard/py-spiffe.git +RUN pip install git+https://github.com/HewlettPackard/py-spiffe.git@3640af9d6629c05e027f99010abc934cb74122a8 # Create code directory, output directory RUN mkdir /container_preparation /output ; chmod -R 777 /output diff --git a/client/data_preparation/Dockerfile b/client/data_preparation/Dockerfile index fbbdea1..9c34410 100644 --- a/client/data_preparation/Dockerfile +++ b/client/data_preparation/Dockerfile @@ -20,7 +20,7 @@ RUN tar xvf spire-1.9.1-linux-amd64-musl.tar.gz ; mv spire-1.9.1 /opt ; mv /opt/ RUN ln -s /opt/spire/bin/spire-agent /usr/bin/spire-agent # Install pyspiffe package -RUN pip install git+https://github.com/HewlettPackard/py-spiffe.git +RUN pip install git+https://github.com/HewlettPackard/py-spiffe.git@3640af9d6629c05e027f99010abc934cb74122a8 # Create code directory, output directory RUN mkdir /data_preparation /output diff --git a/server/Dockerfile b/server/Dockerfile index fd1aa1b..a2bc5e3 100644 --- a/server/Dockerfile +++ b/server/Dockerfile @@ -13,7 +13,7 @@ RUN ln -s /opt/spire/bin/spire-agent /usr/bin/spire-agent RUN ln -s /opt/spire/bin/spire-server /usr/bin/spire-server # Install pyspiffe package -RUN pip install git+https://github.com/HewlettPackard/py-spiffe.git +RUN pip install git+https://github.com/HewlettPackard/py-spiffe.git@3640af9d6629c05e027f99010abc934cb74122a8 # Copy server RUN mkdir /server From e859010cf1b37442d7a1e60b65f191d0dd42f6db Mon Sep 17 00:00:00 2001 From: telliere Date: Tue, 26 Mar 2024 11:26:48 +0200 Subject: [PATCH 18/18] #11 removing hardcoded age and gocryptfs paths --- client/job_preparation/utils/sbatch.template | 40 +++++++++++++++----- 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/client/job_preparation/utils/sbatch.template b/client/job_preparation/utils/sbatch.template index 586f7a1..1dcf1ab 100644 --- a/client/job_preparation/utils/sbatch.template +++ b/client/job_preparation/utils/sbatch.template @@ -53,15 +53,37 @@ export vault="VAULT_ADDRESS" WORKING_DIRECTORY=WORKDIR mkdir -p ${WORKING_DIRECTORY} -# Bring LUMI-SD logic +# Clone HPCS repository if not already available +echo -e "${YELLOW}[LUMI-SD]${NC}${BLUE}[Job]${NC} Cloning HPCS repository if it doesn't already exist" + +if ! [ -d ~/HPCS ]; then + git clone https://github.com/CSCfi/HPCS.git ~/HPCS +fi + +echo -e "${YELLOW}[LUMI-SD]${NC}${BLUE}[Job]${NC} Getting age and gocryptfs as static binaries" +cd ~/HPCS || exit 1 +PATH=$PATH:$HOME/HPCS/bin + +if ! which age ; then + mkdir -p bin + curl -O -L https://github.com/FiloSottile/age/releases/download/v1.1.1/age-v1.1.1-linux-amd64.tar.gz || exit 1 + tar xvf age-v1.1.1-linux-amd64.tar.gz + mv age/age ./bin/ + rm -r age* +fi + +if ! which gocryptfs ; then + mkdir -p bin + curl -O -L https://github.com/rfjakob/gocryptfs/releases/download/v2.4.0/gocryptfs_v2.4.0_linux-static_amd64.tar.gz || exit 1 + tar xvf gocryptfs_v2.4.0_linux-static_amd64.tar.gz + mv gocryptfs ./bin/ + rm -r gocryptfs* +fi -## Clone repo in common directory if it doesn't already exists -### For the moment : assume it's already cloned in ~/LUMI-secure-processing echo -e "${YELLOW}[LUMI-SD]${NC}${BLUE}[Job]${NC} Running agent registration" # Spawn spire-agent -cd ~/HPCS || exit 1 python3 ./utils/spawn_agent.py --config ~/.config/hpcs-client.conf -cn > $WORKING_DIRECTORY/agent.log 2> $WORKING_DIRECTORY/agent.log & spire_agent_pid=$! @@ -96,7 +118,7 @@ echo "$data_key" > /tmp/container_key echo "Decrypting container image ..." # Decrypt the container image -~/HPCS/client/container_preparation/input_logic/age --decrypt -i /tmp/container_key -o $WORKING_DIRECTORY/app.sif APPLICATION_PATH || exit 1 +age --decrypt -i /tmp/container_key -o $WORKING_DIRECTORY/app.sif APPLICATION_PATH || exit 1 echo -e "${YELLOW}[LUMI-SD]${NC}${BLUE}[Job]${NC} Creating encrypted volumes" @@ -110,8 +132,8 @@ password_in=$(tr -dc 'A-Za-z0-9!?%=' < /dev/urandom | head -c 25) password_out=$(tr -dc 'A-Za-z0-9!?%=' < /dev/urandom | head -c 25) # Setup encrypted volumes -/users/etellier/gocryptfs -q -init --extpass echo --extpass "$password_out" output -/users/etellier/gocryptfs -q -init --extpass echo --extpass "$password_in" input +gocryptfs -q -init --extpass echo --extpass "$password_out" output +gocryptfs -q -init --extpass echo --extpass "$password_in" input # Mounting cipher version of the output encrypted filesystem to write final encrypted results # Mounting the encrypted input data tgz to the container to decrypt it in the input encrypted filesystem @@ -119,8 +141,8 @@ BASIC_FLAGS="--bind ./output:/tmp/output --bind DATA_PATH:/sd-container/input/da # Encrypted binds inside of the container BIND_ENCRYPTED_VOLUME_FLAG="--fusemount host:" -BIND_ENCRYPTED_VOLUME_FLAG_MOUNT_INPUT="/users/etellier/gocryptfs -q --extpass echo --extpass ${password_in} ${WORKING_DIRECTORY}/input /sd-container/encrypted" -BIND_ENCRYPTED_VOLUME_FLAG_MOUNT_OUTPUT="/users/etellier/gocryptfs -q --extpass echo --extpass ${password_out} ${WORKING_DIRECTORY}/output /sd-container/output" +BIND_ENCRYPTED_VOLUME_FLAG_MOUNT_INPUT="$HOME/HPCS/bin/gocryptfs -q --extpass echo --extpass ${password_in} ${WORKING_DIRECTORY}/input /sd-container/encrypted" +BIND_ENCRYPTED_VOLUME_FLAG_MOUNT_OUTPUT="$HOME/HPCS/bin/gocryptfs -q --extpass echo --extpass ${password_out} ${WORKING_DIRECTORY}/output /sd-container/output" BIND_INPUT="INPUT_SCRIPTS_DIR" BIND_OUTPUT="OUTPUT_SCRIPTS_DIR"