diff --git a/.git-crypt/.gitattributes b/.git-crypt/.gitattributes new file mode 100644 index 0000000..665b10e --- /dev/null +++ b/.git-crypt/.gitattributes @@ -0,0 +1,4 @@ +# Do not edit this file. To specify the files to encrypt, create your own +# .gitattributes file in the directory where your files are. +* !filter !diff +*.gpg binary diff --git a/.git-crypt/keys/default/0/289BD22DE72C1F968395D27344D6C3045E984B2D.gpg b/.git-crypt/keys/default/0/289BD22DE72C1F968395D27344D6C3045E984B2D.gpg new file mode 100644 index 0000000..0ce9c28 Binary files /dev/null and b/.git-crypt/keys/default/0/289BD22DE72C1F968395D27344D6C3045E984B2D.gpg differ diff --git a/.gitattributes b/.gitattributes index 00fef5c..933bd2c 100644 --- a/.gitattributes +++ b/.gitattributes @@ -2,3 +2,5 @@ client/container_preparation/input_logic/age filter=lfs diff=lfs merge=lfs -text client/container_preparation/input_logic/curl filter=lfs diff=lfs merge=lfs -text client/container_preparation/input_logic/jq filter=lfs diff=lfs merge=lfs -text client/container_preparation/input_logic/tar filter=lfs diff=lfs merge=lfs -text +# encrypted terraform secrets +terraform/secrets/** filter=git-crypt diff=git-crypt diff --git a/.github/workflows/build-container-prep-image.yml b/.github/workflows/build-container-prep-image.yml index b6b6642..1c393b0 100644 --- a/.github/workflows/build-container-prep-image.yml +++ b/.github/workflows/build-container-prep-image.yml @@ -1,5 +1,9 @@ name: Build and publish HPCS container preparation image -on: [push] +on: + push: + branches: + - main + pull_request: env: IMAGE_NAME: hpcs/container-prep diff --git a/.github/workflows/build-data-prep-image.yml b/.github/workflows/build-data-prep-image.yml index 307ce16..d1a2fb3 100644 --- a/.github/workflows/build-data-prep-image.yml +++ b/.github/workflows/build-data-prep-image.yml @@ -1,5 +1,9 @@ name: Build and publish HPCS data preparation image -on: [push] +on: + push: + branches: + - main + pull_request: env: IMAGE_NAME: hpcs/data-prep diff --git a/.github/workflows/build-job-prep-image.yml b/.github/workflows/build-job-prep-image.yml index 1e62799..d2c88a8 100644 --- a/.github/workflows/build-job-prep-image.yml +++ b/.github/workflows/build-job-prep-image.yml @@ -1,5 +1,9 @@ name: Build and publish HPCS job preparation image -on: [push] +on: + push: + branches: + - main + pull_request: env: IMAGE_NAME: hpcs/job-prep diff --git a/.github/workflows/build-server-image.yml b/.github/workflows/build-server-image.yml index 97657d9..4734bb5 100644 --- a/.github/workflows/build-server-image.yml +++ b/.github/workflows/build-server-image.yml @@ -1,5 +1,9 @@ name: Build and publish HPCS server image -on: [push] +on: + push: + branches: + - main + pull_request: env: IMAGE_NAME: hpcs/server diff --git a/.github/workflows/general-lint.yaml b/.github/workflows/general-lint.yaml new file mode 100644 index 0000000..2bdb13e --- /dev/null +++ b/.github/workflows/general-lint.yaml @@ -0,0 +1,33 @@ +name: Build and publish HPCS server image +on: + push: + branches: + - main + pull_request: + +jobs: + general_lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Checking for newlines at the end of files + run: if (for file in $(find . -type f -not -path './.git/*' -not -path './.git-crypt/*' -not -path './terraform/secrets/*') ; do [ "$(tail -c 1 < "${file}")" == "" ] || echo "${file} has no newline at the end..." ; done) | grep . ; then exit 1 ; fi + - name: Checking for trailing whitespaces + run: if find . -type f -not -path './.git/*' -exec egrep -l " +$" {} \; | grep . ; then exit 1 ; fi + + - name: Running shellcheck on *.sh files + run: | + find . -name .git -type d -prune -o -type f -name \*.sh -print0 | + xargs -0 -r -n1 shellcheck + helm_lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: run helm lint on hpcs-stack + run: docker run --rm -v $(pwd)/k8s:/apps alpine/helm:latest lint hpcs-stack + terraform_lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: run terraform fmt + run: docker run --rm -v $(pwd):/data docker.io/hashicorp/terraform fmt -check /data/terraform diff --git a/.gitignore b/.gitignore index d4e95c2..919379c 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,19 @@ # Undo-tree save-files *.~undo-tree + +# openrc configs +*-openrc.sh + +# terraform +.terraform* +## local variables +terraform/*.tfvars +## user specific secrets +terraform/secrets/public_keys +terraform/secrets/tunnel_keys + +# Ignore certificates and keys +*.crt +*.csr +*.key diff --git a/README.md b/README.md index 725ec35..06b98cc 100644 --- a/README.md +++ b/README.md @@ -191,7 +191,7 @@ To run one of the containers : docker compose run --rm [data/container/job]-prep ``` -If you want to run the whole process by yourself : +If you want to run the whole process by yourself : ```bash docker compose run --rm data-prep @@ -203,9 +203,10 @@ An example demonstration is available [here](https://asciinema.org/a/PWDzxlaVQmf ### Server -HPCS Server is an API, interfacing HPCS client with Vault and Spire. This section needs basic knowledge of [SPIFFE/SPIRE](https://spiffe.io/) and [HashiCorp Vault](https://www.vaultproject.io/). +HPCS Server is an API, interfacing HPCS client with Vault and Spire. This section needs basic knowledge of [SPIFFE/SPIRE](https://spiffe.io/) and [HashiCorp Vault](https://www.vaultproject.io/). For k8s, we only consider `kubectl` and `ansible` as available tools and that `kubectl` can create pods. Vault roles, spire identities are created automatically. +For development and demonstrative purposes we provide a `terraform` definition of a VM with an operational kubernetes cluster, for documentation and deployment instructions go [there](terraform). For docker-compose, we consider the Vault and the Spire Server as setup and the Spire-OIDC provider implemented to allow login to the vault using SVID identity. We also consider that proper roles are created in Vault to authorize HPCS Server to write roles and policies to the Vault, using a server SPIFFEID. @@ -257,77 +258,29 @@ Before proceeding to HPCS' deployment, an original setup is required including : - A ready-to-run k8s cluster - `kubectl` and `helm` available and able to run kubernetes configurations (`.yaml`) - `rbac`, `storage` and `dns` and `helm` kubernetes capabilities, f.e : `microk8s enable rbac storage dns helm` with microk8s. - + Please note down the name of your k8s cluster in order to run later deployments. ##### Configuration -Several configurations are to be reviewed before proceeding. -- Nginx SSL Certificate path : Please review in `/k8s/spire-server-nginx-configmap.yaml` (section `ssl_certificate`) and `/k8s/spire-server-statefulset.yaml` (section `volumeMounts` of container `hpcs-nginx` and section `volumes` of the pod configuration). If you plan to run the deployment using ansible, please review `/k8s/deploy-all.yaml`, section `Copy oidc cert to vault's pod` and `Create spire-oidc {key, csr, cert}` for the host path to the certificate. Create the directory configured before running deployment. - -- Cluster name : Please review in `/k8s/hpcs-server-configmap.yaml`, section "`agent.conf`", then "`k8s_psat`" and `/k8s/spire-server-configmap.yaml`, section "`server.conf`", then "`k8s_psat`", replace "`docker-desktop`" with your k8s cluster name. - -- For further information about spire agent/ server configurations under `/k8s/hpcs-server-configmap.yaml` and `/k8s/spire-server-configmap.yaml`, please refer to spire-server [configuration reference](https://spiffe.io/docs/latest/deploying/spire_server) and spire-agent [configuration reference](https://spiffe.io/docs/latest/deploying/spire_agent/). - - ##### Bash This part of the documentation walks you through the different steps necessary in order to run a manual deployment of HPCS' serverside (including Vault, Spire-Server and HPCS Server). -__Starting with the "`spire-server`" pods :__ - -Generate your nginx certificate : -```bash -openssl req -x509 -nodes -days 365 -newkey rsa:2048 -keyout /path/to/your/privatekey.key -out /path/to/your/certificate.crt -addext "subjectAltName = DNS:spire-oidc" -``` - -Create HPCS namespace : -```bash -cd k8s -kubectl apply -f hpcs-namespace.yaml -``` - -Create Spire service account and cluster role : -```bash -kubectl apply -f spire-server-account.yaml -f spire-server-cluster-role.yaml -``` - -Create configmaps for spire-server, spire-oidc and nginx proxy : +Generate your certificate : ```bash -kubectl apply -f spire-oidc-configmap.yaml -f spire-server-configmap.yaml -f spire-server-nginx-configmap.yaml +openssl req -x509 -nodes -days 365 -newkey rsa:4096 -keyout hpcs-stack/charts/spire/files/spire-oidc.key -out hpcs-stack/charts/spire/files/spire-oidc.crt -addext "subjectAltName = DNS:spire-oidc" ``` -Create spire-server statefulset, managing spire-server-x pods : -```bash -kubectl apply -f spire-server-statefulset.yaml -``` - -Expose spire-oidc proxy and spire-server's api over the cluster : -```bash -kubectl apply -f spire-server-service.yaml -f spire-oidc-service.yaml -``` - -At this point, you should be able to see at least one `spire-server-x` pod, f.e : - +Add hashicorp repo and run installation : ```bash -kubectl get -n hpcs pod/spire-server-0 -NAME READY STATUS RESTARTS AGE -spire-server-0 3/3 Running 0 30s +helm repo add hashicorp https://helm.releases.hashicorp.com +helm install vault hashicorp/vault --version 0.27.0 --namespace=hpcs ``` -And the port on which the spire-server API is exposed (here 31140) : +Install `hpcs-stack` chart : ```bash -kubectl get -n hpcs service/spire-server -NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE -spire-server LoadBalancer 10.99.214.248 localhost 8081:31140/TCP 30s -``` - -__Then install Hashicorp Vault via it's official helm chart (here with microk8s):__ - -Add hashicorp repo and run installation : -```bash -microk8s helm3 repo add hashicorp https://helm.releases.hashicorp.com -helm install vault hashicorp/vault --version 0.27.0 --namespace=hpcs +helm upgrade --install --namespace hpcs --create-namespace hpcs-stack $(git rev-parse --show-toplevel)/k8s/hpcs-stack ``` Initialize the Vault : @@ -341,7 +294,14 @@ Unseal vault : kubectl exec -it vault-0 -n hpcs -- vault operator unseal [seal token] ``` -Connect to the vault to enable jwt auth and kvv2 secrets, register oidc as a source : +Now double check that `spire-server` installed as part of `hpcs-stack` is actually ready already : +```bash +-> kubectl get --namespace hpcs pods/spire-server-0 +NAME READY STATUS RESTARTS AGE +spire-server-0 3/3 Running 0 73m +``` + +If it's ready you can connect to the vault to enable jwt auth and kvv2 secrets, register oidc as a source : ```bash kubectl exec -it vault-0 -n hpcs -- sh export VAULT_TOKEN="[root token]" @@ -360,17 +320,6 @@ vault write auth/jwt/config oidc_discovery_url=https://spire-oidc oidc_discovery " ``` -Expose Vault's API to the node : -```bash -kubectl expose service vault --name="vault-external" --type="NodePort" --target-port 8200 -n hpcs -``` - -At this point, Vault is running and it's API is exposed, to check on which port, run : -```bash -NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE -vault-external NodePort 10.111.198.147 localhost 8200:31819/TCP,8201:31587/TCP 2s -``` - __Next step is to create a spire identity and it's vault role in order to be able to identify HPCS-Server against Vault__ Get your kubernetes node uid (repeat this and the following spire identity creation for every nodes): @@ -408,46 +357,19 @@ path "sys/policies/acl/*" { en_policies=hpcs-server ``` -__You can now deploy HPCS server__ - -Create hpcs-server and hpcs-spire service accounts : -```bash -kubectl apply -f hpcs-server-account.yaml -f hpcs-spire-account.yaml -``` - -Create hpcs server configmap : -```bash -kubectl apply -f hpcs-server-configmap.yaml -``` +You can now wait for HPCS server to finish setting up : -Create hpcs-server statefulset (and underlying pods) : ```bash -kubectl apply -f hpcs-server-statefulset.yaml -``` - -Expose hpcs-server api over the cluster : -```bash -kubectl apply -f hpcs-server-service.yaml -``` - -Expose hpcs-server service over the node : -```bash -kubectl expose service hpcs-server --name="hpcs-server-external" --type="NodePort" --target-port 10080 -n hpcs -``` - -Check exposed port : -```bash -NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE -hpcs-server-external NodePort 10.111.198.151 localhost 10080:31827/TCP 2s +-> kubectl get --namespace hpcs pods/hpcs-server-0 +NAME READY STATUS RESTARTS AGE +hpcs-server-0 1/1 Running 3 (75m ago) 75m ``` That's it, you can now use HPCS server as you please. ##### Ansible -:warning: This method is currently still under development. You could run into non-documented issues. - -The previously explained steps can be automatically run using an ansible playbook available under `/k8s/deploy-all.yaml` +The previously explained steps can be automatically run using an ansible [playbook](k8s/deploy-all.yaml). All the pre-requisites listed before are necessary to run this playbook. If you are running kubernetes using `microk8s`, you will need to create aliases or fake commands for `helm`, for example using a script : ```bash @@ -585,7 +507,7 @@ Using TPM, for example, it is very easy to run automatic node attestation, based ### Encrypted container -The goal of this project was to leverage Singularity/Apptainer's [encrypted containers](https://docs.sylabs.io/guides/3.4/user-guide/encryption.html). This feature enables the end user to protect the runtime of the container, allowing it to confine unencrypted data within the encrypted container, adding an extra layer of security. +The goal of this project was to leverage Singularity/Apptainer's [encrypted containers](https://docs.sylabs.io/guides/3.4/user-guide/encryption.html). This feature enables the end user to protect the runtime of the container, allowing it to confine unencrypted data within the encrypted container, adding an extra layer of security. Unfortunately for LUMI, this feature relies on different technologies, depending the permission level at which the container is encrypted, this behaviour is documented in the following table for usage on LUMI : @@ -599,7 +521,7 @@ Unfortunately for LUMI, this feature relies on different technologies, depending Two main reasons for the issues with the encrypted containers : - Cannot run as root on a node (no workaround, as this is a feature of HPC environments). - User namespaces are disabled on LUMI (for secure reason, [this stackexchange](https://security.stackexchange.com/questions/267628/user-namespaces-do-they-increase-security-or-introduce-new-attack-surface) has some explanations). - + To run encrypted containers as described above, we would need to enable user namespaces on the platform. This would require a thorough risk/benefit assessment, since it introduces new attack surfaces and therefore will not be introduced lightly, at least not on on LUMI in the near future. We mitigate the unavailability of encrypted containers in two steps : @@ -616,7 +538,7 @@ When a client wants to encrypt its data or container and to give access to it to - Client runs containers using cgroupsv2 - Client runs on Linux - `spire-agent api fetch` can be attested using spire-agent binary's `sha256sum` - - `python3 ./utils/spawn_agent.py` can't be attested since the `sha256sum` recognised by the workload API is `python3`'s. A mitigation to that would be to compile the code, if possible. This would potentially provide a unique binary that would then be able to be attested using `sha256sum` + - `python3 ./utils/spawn_agent.py` can't be attested since the `sha256sum` recognised by the workload API is `python3`'s. A mitigation to that would be to compile the code, if possible. This would potentially provide a unique binary that would then be able to be attested using `sha256sum` - Client runs on MacOS - No attestation is doable at the moment since MacOS doesn't support docker and runs container inside of a Linux VM - Using cgroupsv2 diff --git a/client/container_preparation/Dockerfile b/client/container_preparation/Dockerfile index 5e0a955..9d2478d 100644 --- a/client/container_preparation/Dockerfile +++ b/client/container_preparation/Dockerfile @@ -2,11 +2,6 @@ ARG BUILDPLATFORM=linux/amd64 FROM --platform=$BUILDPLATFORM python:3.9-alpine - - -# Add rust binaries to PATH -ENV PATH="$PATH:/root/.cargo/bin" - # Create code directory, output directory RUN mkdir /container_preparation /output @@ -19,22 +14,29 @@ COPY ./utils /container_preparation/utils # Set workdir WORKDIR /container_preparation -# Install necessary packages, spire-agent and rust +# Install necessary packages RUN apk add --no-cache \ - git=2.43.0-r0 \ - curl=8.5.0-r0 \ + git=2.45.2-r0 \ + curl=8.11.0-r2 \ jq=1.7.1-r0 \ build-base=0.5-r3 \ - libffi-dev=3.4.4-r3 && \ -curl -LsSf -o spire-1.9.0-linux-amd64-musl.tar.gz https://github.com/spiffe/spire/releases/download/v1.9.0/spire-1.9.0-linux-amd64-musl.tar.gz && \ -tar xvf spire-1.9.0-linux-amd64-musl.tar.gz ; mv spire-1.9.0 /opt ; mv /opt/spire-1.9.0 /opt/spire && \ -ln -s /opt/spire/bin/spire-agent /usr/bin/spire-agent && \ -ln -s /opt/spire/bin/spire-server /usr/bin/spire-server && \ -rm -rf spire-1.9.0-linux-amd64-musl.tar.gz && \ -curl https://sh.rustup.rs -sSf -o rustup.sh ; chmod +x rustup.sh ; ./rustup.sh -y ; export PATH="$PATH":/root/.cargo/bin && \ -pip install --no-cache-dir -r ./requirements.txt && \ -pip install --no-cache-dir git+https://github.com/HewlettPackard/py-spiffe.git@3640af9d6629c05e027f99010abc934cb74122a8 && \ -rm -r /root/.cargo /root/.rustup + libffi-dev=3.4.6-r0 +# Install spire-agent +RUN curl -LsSf -o spire-1.9.0-linux-amd64-musl.tar.gz https://github.com/spiffe/spire/releases/download/v1.9.0/spire-1.9.0-linux-amd64-musl.tar.gz && \ + tar xvf spire-1.9.0-linux-amd64-musl.tar.gz && \ + mv spire-1.9.0 /opt && \ + mv /opt/spire-1.9.0 /opt/spire && \ + ln -s /opt/spire/bin/spire-agent /usr/bin/spire-agent && \ + ln -s /opt/spire/bin/spire-server /usr/bin/spire-server && \ + rm -rf spire-1.9.0-linux-amd64-musl.tar.gz +# Install python things (and rust temporarily) +RUN curl https://sh.rustup.rs -sSf -o rustup.sh && \ + chmod +x rustup.sh && \ + ./rustup.sh -y && \ + export PATH="$PATH:/root/.cargo/bin" && \ + pip install --no-cache-dir -r ./requirements.txt && \ + pip install --no-cache-dir git+https://github.com/HewlettPackard/py-spiffe.git@3640af9d6629c05e027f99010abc934cb74122a8 && \ + rm -r /root/.cargo /root/.rustup # Set entrypoint ENTRYPOINT [ "./entrypoint.sh" ] diff --git a/client/container_preparation/entrypoint.sh b/client/container_preparation/entrypoint.sh index 3173f76..74dfa2b 100755 --- a/client/container_preparation/entrypoint.sh +++ b/client/container_preparation/entrypoint.sh @@ -8,7 +8,7 @@ docker_path="/var/run/docker.sock" # Argument parser, arguments for both container preparation and key shipping should be handled here. parse_args() { - while [[ "$#" -gt 0 ]]; do + while [ "${#}" -gt 0 ]; do case "$1" in --config) config="$2" @@ -69,7 +69,7 @@ parse_args() { done # Check for required arguments - if [ -z "$config" ] || [ -z "$base_oci_image" ] || [ -z "$sif_path" ] || [ -z "$data_path" ] || [ -z "$data_path_at_rest" ] || ([ -z "$users" ] && [ -z "$groups" ]) || [ -z "$compute_nodes" ]; then + if [ -z "$config" ] || [ -z "$base_oci_image" ] || [ -z "$sif_path" ] || [ -z "$data_path" ] || [ -z "$data_path_at_rest" ] || { [ -z "$users" ] && [ -z "$groups" ]; } || [ -z "$compute_nodes" ]; then echo echo "Please provides options for both of these programs : " python3 ./prepare_container.py --help python3 ./utils/ship_a_key.py --help @@ -79,7 +79,7 @@ parse_args() { # Cleanup spire-agent generated files end_entrypoint() { - if ! [ -n "$encrypted" ]; then + if [ -z "$encrypted" ]; then echo "No encryption, nothing to clean" else echo "Cleaning everything before leaving ..." @@ -100,21 +100,21 @@ NC='\033[0m' # No Color # Parse arguments from cli parse_args "$@" -echo -e "${YELLOW}[LUMI-SD]${NC}${BLUE}[Container preparation]${NC} Entering entrypoint" +printf "%b\n" "${YELLOW}[LUMI-SD]${NC}${BLUE}[Container preparation]${NC} Entering entrypoint" # ## [RUN] Perform node attestation (spawn agent, register it's and it's workload's spiffeID) # if [ -n "$encrypted" ]; then - echo -e "${YELLOW}[LUMI-SD]${NC}${BLUE}[Container preparation]${NC} Encryption mode is on. Registering and running SPIRE Agent" + printf "%b\n" "${YELLOW}[LUMI-SD]${NC}${BLUE}[Container preparation]${NC} Encryption mode is on. Registering and running SPIRE Agent" - python3 ./utils/spawn_agent.py --config $config >/dev/null 2>/dev/null & + python3 ./utils/spawn_agent.py --config "$config" >/dev/null 2>/dev/null & spire_agent_pid=$! fi -ps $spire_agent_pid >/dev/null || ( +ps "$spire_agent_pid" >/dev/null || ( echo "spire agent died, aborting" end_entrypoint "$spire_agent_pid" 1 ) @@ -123,7 +123,7 @@ ps $spire_agent_pid >/dev/null || ( ## [END] Perform node attestation # -echo -e "${YELLOW}[LUMI-SD]${NC}${BLUE}[Container preparation]${NC} Run container preparation" +printf "%b\n" "${YELLOW}[LUMI-SD]${NC}${BLUE}[Container preparation]${NC} Run container preparation" # ## [RUN] Run container preparation (Preparation of new image, build of new image, build of Apptainer/Singularity image) @@ -139,7 +139,7 @@ fi ## [END] Run container preparation # -echo -e "${YELLOW}[LUMI-SD]${NC}${BLUE}[Container preparation]${NC} Container preparation ended" +printf "%b\n" "${YELLOW}[LUMI-SD]${NC}${BLUE}[Container preparation]${NC} Container preparation ended" # ## [RUN] Ship private key to the vault (Creation of workload identity to give access to the key, writing key to the vault) @@ -150,29 +150,29 @@ if [ -n "$encrypted" ]; then fi if [ -z "$encrypted" ]; then - echo -e "${YELLOW}[LUMI-SD]${NC}${BLUE}[Container preparation]${NC} Encryption mode is off, nothing to do" + printf "%b\n" "${YELLOW}[LUMI-SD]${NC}${BLUE}[Container preparation]${NC} Encryption mode is off, nothing to do" else - echo -e "${YELLOW}[LUMI-SD]${NC}${BLUE}[Container preparation]${NC} Encryption mode is on, writing key to the vault, using spiffeID $spiffeID" + printf "%b\n" "${YELLOW}[LUMI-SD]${NC}${BLUE}[Container preparation]${NC} Encryption mode is on, writing key to the vault, using spiffeID $spiffeID" if [ -z "$users" ]; then # If the user provided only groups - python3 ./utils/ship_a_key.py --config $config --username "$username" -g "$groups" -c "$compute_nodes" --data-path "$data_path" --data-path-at-rest "$data_path_at_rest" -i "$spiffeID" || end_entrypoint "$spire_agent_pid" 1 + python3 ./utils/ship_a_key.py --config "$config" --username "$username" -g "$groups" -c "$compute_nodes" --data-path "$data_path" --data-path-at-rest "$data_path_at_rest" -i "$spiffeID" || end_entrypoint "$spire_agent_pid" 1 elif [ -z "$groups" ]; then # If the user provided only users - python3 ./utils/ship_a_key.py --config $config --username "$username" -u "$users" -c "$compute_nodes" --data-path "$data_path" --data-path-at-rest "$data_path_at_rest" -i "$spiffeID" || end_entrypoint "$spire_agent_pid" 1 + python3 ./utils/ship_a_key.py --config "$config" --username "$username" -u "$users" -c "$compute_nodes" --data-path "$data_path" --data-path-at-rest "$data_path_at_rest" -i "$spiffeID" || end_entrypoint "$spire_agent_pid" 1 else # If the user provided both - python3 ./utils/ship_a_key.py --config $config --username "$username" -u "$users" -g "$groups" -c "$compute_nodes" --data-path "$data_path" --data-path-at-rest "$data_path_at_rest" -i "$spiffeID" || end_entrypoint "$spire_agent_pid" 1 + python3 ./utils/ship_a_key.py --config "$config" --username "$username" -u "$users" -g "$groups" -c "$compute_nodes" --data-path "$data_path" --data-path-at-rest "$data_path_at_rest" -i "$spiffeID" || end_entrypoint "$spire_agent_pid" 1 fi - echo -e "${YELLOW}[LUMI-SD]${NC}${BLUE}[Container preparation]${NC} Key written to the vault" + printf "%b\n" "${YELLOW}[LUMI-SD]${NC}${BLUE}[Container preparation]${NC} Key written to the vault" fi # ## [END] Ship private key to the vault # -echo -e "${YELLOW}[LUMI-SD]${NC}${BLUE}[Container preparation]${NC} Leaving entrypoint" +printf "%b\n" "${YELLOW}[LUMI-SD]${NC}${BLUE}[Container preparation]${NC} Leaving entrypoint" end_entrypoint "$spire_agent_pid" 0 diff --git a/client/container_preparation/input_logic/run.sh b/client/container_preparation/input_logic/run.sh index 86a1a34..579c073 100755 --- a/client/container_preparation/input_logic/run.sh +++ b/client/container_preparation/input_logic/run.sh @@ -7,10 +7,12 @@ PATH="$PATH:/sd-container/tools/input_logic/" echo "[SD-Container][Input-Logic] : Getting data decryption key from vault" # Get token via vault login. The data_login environment variable need to be exported from calling script -data_token=$(curl -s --request POST --data "$data_login" $vault/v1/auth/jwt/login | jq '.auth.client_token' -r) || exit 1 +# shellcheck disable=SC2154 # data_login and vault are actually environment variables someone at some point decided to use lower case letters for <- TODO: fix this +data_token=$(curl -s --request POST --data "$data_login" "$vault/v1/auth/jwt/login" | jq '.auth.client_token' -r) || exit 1 # Use the token to access the key. The data_path environment variable needs to be exported from calling script -data_key=$(curl -s -H "X-Vault-Token: $data_token" $vault/v1/kv/data/${data_path} | jq '.data.data.key' -r) || exit 1 +# shellcheck disable=SC2154 # data_path and vault are actually environment variables someone at some point decided to use lower case letters for <- TODO: fix this +data_key=$(curl -s -H "X-Vault-Token: $data_token" "$vault/v1/kv/data/${data_path}" | jq '.data.data.key' -r) || exit 1 # Write the key in an encrypted volume echo "$data_key" >/sd-container/encrypted/decryption_key @@ -26,7 +28,7 @@ rm /sd-container/encrypted/decryption_key echo "[SD-Container][Input-Logic] : Data decrypted" # Untar the not anymore encrypted archive -cd /sd-container/encrypted +cd /sd-container/encrypted || exit 1 tar xvf /sd-container/encrypted/decrypted_data.tgz || exit 1 echo "[SD-Container][Input-Logic] : Data untared" diff --git a/client/container_preparation/lib/image_build.py b/client/container_preparation/lib/image_build.py index 3fc4134..04da938 100644 --- a/client/container_preparation/lib/image_build.py +++ b/client/container_preparation/lib/image_build.py @@ -25,8 +25,9 @@ def generate_prepared_dockerfile( # Read the current image to extract the entrypoint # Depending on image configuration, Entrypoint can be found at different place - base_entrypoint = base_image.attrs["ContainerConfig"]["Entrypoint"] - if base_entrypoint == None: + if "ContainerConfig" in base_image.attrs: + base_entrypoint = base_image.attrs["ContainerConfig"]["Entrypoint"] + else: base_entrypoint = base_image.attrs["Config"]["Entrypoint"] # Starting to write the prepared container Dockerfile diff --git a/client/data_preparation/Dockerfile b/client/data_preparation/Dockerfile index 7836c1b..ae6a556 100644 --- a/client/data_preparation/Dockerfile +++ b/client/data_preparation/Dockerfile @@ -14,22 +14,29 @@ COPY ./utils /data_preparation/utils # Set workdir WORKDIR /data_preparation -# Install necessary packages, spire-agent and rust +# Install necessary packages RUN apk add --no-cache \ - git=2.43.0-r0 \ - curl=8.5.0-r0 \ + git=2.45.2-r0 \ + curl=8.11.0-r2 \ jq=1.7.1-r0 \ build-base=0.5-r3 \ - libffi-dev=3.4.4-r3 && \ -curl -LsSf -o spire-1.9.0-linux-amd64-musl.tar.gz https://github.com/spiffe/spire/releases/download/v1.9.0/spire-1.9.0-linux-amd64-musl.tar.gz && \ -tar xvf spire-1.9.0-linux-amd64-musl.tar.gz ; mv spire-1.9.0 /opt ; mv /opt/spire-1.9.0 /opt/spire && \ -ln -s /opt/spire/bin/spire-agent /usr/bin/spire-agent && \ -ln -s /opt/spire/bin/spire-server /usr/bin/spire-server && \ -rm -rf spire-1.9.0-linux-amd64-musl.tar.gz && \ -curl https://sh.rustup.rs -sSf -o rustup.sh ; chmod +x rustup.sh ; ./rustup.sh -y ; export PATH="$PATH":/root/.cargo/bin && \ -pip install --no-cache-dir -r ./requirements.txt && \ -pip install --no-cache-dir git+https://github.com/HewlettPackard/py-spiffe.git@3640af9d6629c05e027f99010abc934cb74122a8 && \ -rm -r /root/.cargo /root/.rustup + libffi-dev=3.4.6-r0 +# Install spire-agent +RUN curl -LsSf -o spire-1.9.0-linux-amd64-musl.tar.gz https://github.com/spiffe/spire/releases/download/v1.9.0/spire-1.9.0-linux-amd64-musl.tar.gz && \ + tar xvf spire-1.9.0-linux-amd64-musl.tar.gz && \ + mv spire-1.9.0 /opt && \ + mv /opt/spire-1.9.0 /opt/spire && \ + ln -s /opt/spire/bin/spire-agent /usr/bin/spire-agent && \ + ln -s /opt/spire/bin/spire-server /usr/bin/spire-server && \ + rm -rf spire-1.9.0-linux-amd64-musl.tar.gz +# Install python things (and rust temporarily) +RUN curl https://sh.rustup.rs -sSf -o rustup.sh && \ + chmod +x rustup.sh && \ + ./rustup.sh -y && \ + export PATH="$PATH:/root/.cargo/bin" && \ + pip install --no-cache-dir -r ./requirements.txt && \ + pip install --no-cache-dir git+https://github.com/HewlettPackard/py-spiffe.git@3640af9d6629c05e027f99010abc934cb74122a8 && \ + rm -r /root/.cargo /root/.rustup # Set entrypoint ENTRYPOINT [ "./entrypoint.sh" ] diff --git a/client/data_preparation/entrypoint.sh b/client/data_preparation/entrypoint.sh index 6118bd2..79c3371 100755 --- a/client/data_preparation/entrypoint.sh +++ b/client/data_preparation/entrypoint.sh @@ -5,7 +5,7 @@ # Argument parser, arguments for both Data preparation and key shipping should be handled here. parse_args() { - while [[ "$#" -gt 0 ]]; do + while [ "$#" -gt 0 ]; do case "$1" in --config) config="$2" @@ -58,7 +58,7 @@ parse_args() { done # Check for required arguments - if [ -z "$config" ] || [ -z "$input_data" ] || [ -z "$output_data" ] || [ -z "$data_path" ] || [ -z "$data_path_at_rest" ] || [ -z "$username" ] || ([ -z "$users" ] && [ -z "$groups" ]) || [ -z "$compute_nodes" ]; then + if [ -z "$config" ] || [ -z "$input_data" ] || [ -z "$output_data" ] || [ -z "$data_path" ] || [ -z "$data_path_at_rest" ] || [ -z "$username" ] || { [ -z "$users" ] && [ -z "$groups" ]; } || [ -z "$compute_nodes" ]; then echo echo "Please provides options for both of these programs : " python3 ./prepare_data.py --help python3 ./utils/ship_a_key.py --help @@ -86,21 +86,21 @@ NC='\033[0m' # No Color # Parse arguments from cli parse_args "$@" -echo -e "${YELLOW}[LUMI-SD]${NC}${BLUE}[Data preparation]${NC} Entering entrypoint" +printf "%b\n" "${YELLOW}[LUMI-SD]${NC}${BLUE}[Data preparation]${NC} Entering entrypoint" # ## [RUN] Perform node attestation (spawn agent, register it's and it's workload's spiffeID) # -echo -e "${YELLOW}[LUMI-SD]${NC}${BLUE}[Data preparation]${NC} Registering and running SPIRE Agent" +printf "%b\n" "${YELLOW}[LUMI-SD]${NC}${BLUE}[Data preparation]${NC} Registering and running SPIRE Agent" -python3 ./utils/spawn_agent.py --config $config >/dev/null 2>/dev/null & +python3 ./utils/spawn_agent.py --config "$config" >/dev/null 2>/dev/null & spire_agent_pid=$! until [ -e /tmp/agent.sock ]; do - echo -e "${RED}[LUMI-SD][Data preparation] Spire workload api socket doesn't exist, waiting 10 seconds ${NC}" + printf "%b\n" "${RED}[LUMI-SD][Data preparation] Spire workload api socket doesn't exist, waiting 10 seconds ${NC}" sleep 10 - if ! ps | grep $spire_agent_pid >/dev/null; then + if ! pgrep -f "$spire_agent_pid" > /dev/null; then echo "spire agent died, aborting" end_entrypoint "$spire_agent_pid" 1 fi @@ -110,7 +110,7 @@ done ## [END] Perform node attestation # -echo -e "${YELLOW}[LUMI-SD]${NC}${BLUE}[Data preparation]${NC} Run Data preparation" +printf "%b\n" "${YELLOW}[LUMI-SD]${NC}${BLUE}[Data preparation]${NC} Run Data preparation" # ## [RUN] Run Data preparation (Encryption of input data) @@ -122,7 +122,7 @@ python3 ./prepare_data.py -i "$input_data" -o "$output_data" || end_entrypoint " ## [END] Run Data preparation # -echo -e "${YELLOW}[LUMI-SD]${NC}${BLUE}[Data preparation]${NC} Data preparation ended" +printf "%b\n" "${YELLOW}[LUMI-SD]${NC}${BLUE}[Data preparation]${NC} Data preparation ended" # ## [RUN] Ship private key to the vault (Creation of workload identity to give access to the key, writing key to the vault) @@ -130,26 +130,26 @@ echo -e "${YELLOW}[LUMI-SD]${NC}${BLUE}[Data preparation]${NC} Data preparation spiffeID=$(spire-agent api fetch --output json -socketPath /tmp/agent.sock | jq '.svids[0].spiffe_id' -r) -echo -e "${YELLOW}[LUMI-SD]${NC}${BLUE}[Data preparation]${NC} Writing key to the vault, using spiffeID $spiffeID" +printf "%b\n" "${YELLOW}[LUMI-SD]${NC}${BLUE}[Data preparation]${NC} Writing key to the vault, using spiffeID $spiffeID" # Handle different cases of user provided compute nodes / user / groups if [ -z "$users" ]; then # If the user provided only groups - python3 ./utils/ship_a_key.py --config $config --username "$username" -g "$groups" -c "$compute_nodes" --data-path "$data_path" --data-path-at-rest "$data_path_at_rest" -i "$spiffeID" || end_entrypoint "$spire_agent_pid" 1 + python3 ./utils/ship_a_key.py --config "$config" --username "$username" -g "$groups" -c "$compute_nodes" --data-path "$data_path" --data-path-at-rest "$data_path_at_rest" -i "$spiffeID" || end_entrypoint "$spire_agent_pid" 1 elif [ -z "$groups" ]; then # If the user provided only users - python3 ./utils/ship_a_key.py --config $config --username "$username" -u "$users" -c "$compute_nodes" --data-path "$data_path" --data-path-at-rest "$data_path_at_rest" -i "$spiffeID" || end_entrypoint "$spire_agent_pid" 1 + python3 ./utils/ship_a_key.py --config "$config" --username "$username" -u "$users" -c "$compute_nodes" --data-path "$data_path" --data-path-at-rest "$data_path_at_rest" -i "$spiffeID" || end_entrypoint "$spire_agent_pid" 1 else # If the user provided both - python3 ./utils/ship_a_key.py --config $config --username "$username" -u "$users" -g "$groups" -c "$compute_nodes" --data-path "$data_path" --data-path-at-rest "$data_path_at_rest" -i "$spiffeID" || end_entrypoint "$spire_agent_pid" 1 + python3 ./utils/ship_a_key.py --config "$config" --username "$username" -u "$users" -g "$groups" -c "$compute_nodes" --data-path "$data_path" --data-path-at-rest "$data_path_at_rest" -i "$spiffeID" || end_entrypoint "$spire_agent_pid" 1 fi -echo -e "${YELLOW}[LUMI-SD]${NC}${BLUE}[Data preparation]${NC} Key written to the vault" +printf "%b\n" "${YELLOW}[LUMI-SD]${NC}${BLUE}[Data preparation]${NC} Key written to the vault" # ## [END] Ship private key to the vault # -echo -e "${YELLOW}[LUMI-SD]${NC}${BLUE}[Data preparation]${NC} Leaving entrypoint" +printf "%b\n" "${YELLOW}[LUMI-SD]${NC}${BLUE}[Data preparation]${NC} Leaving entrypoint" end_entrypoint "$spire_agent_pid" 0 diff --git a/client/job_preparation/Dockerfile b/client/job_preparation/Dockerfile index 5df7d90..9bb8f8e 100644 --- a/client/job_preparation/Dockerfile +++ b/client/job_preparation/Dockerfile @@ -11,14 +11,20 @@ WORKDIR /job_preparation # Copy utils for SPIFFEID creation ... COPY ./utils /job_preparation/utils -# Install necessary packages and rust +# Install necessary packages RUN apk add --no-cache \ - curl=8.5.0-r0 \ + git=2.45.2-r0 \ + curl=8.11.0-r2 \ + jq=1.7.1-r0 \ build-base=0.5-r3 \ - libffi-dev=3.4.4-r3 && \ -curl https://sh.rustup.rs -sSf -o rustup.sh ; chmod +x rustup.sh ; ./rustup.sh -y ; export PATH="$PATH":/root/.cargo/bin && \ -pip install --no-cache-dir -r ./requirements.txt && \ -rm -r /root/.cargo /root/.rustup + libffi-dev=3.4.6-r0 +# Install python things (and rust temporarily) +RUN curl https://sh.rustup.rs -sSf -o rustup.sh && \ + chmod +x rustup.sh && \ + ./rustup.sh -y && \ + export PATH="$PATH:/root/.cargo/bin" && \ + pip install --no-cache-dir -r ./requirements.txt && \ + rm -r /root/.cargo /root/.rustup # Set entrypoint ENTRYPOINT [ "python3", "./prepare_job.py" ] diff --git a/docs/architecture/container_preparation.md b/docs/architecture/container_preparation.md index 17ac84c..639bd4f 100644 --- a/docs/architecture/container_preparation.md +++ b/docs/architecture/container_preparation.md @@ -17,24 +17,24 @@ sequenceDiagram Container Preparation container ->> Vault: Log-in using SVID Vault ->> Container Preparation container: Returns an authentication token (write only on client's path) Container Preparation container ->> Vault: Write private key using authentication token - Vault ->> Container Preparation container: + Vault ->> Container Preparation container: Container Preparation container ->> HPCS Server: Request creation of workloads (compute nodes, users, groups ...) authorized to access the key and using SVID to authenticate HPCS Server ->> Spire Server: Validate SVID - Spire Server ->> HPCS Spire Agent: + Spire Server ->> HPCS Spire Agent: HPCS Spire Agent ->> Spire Server: Validate SVID - Spire Server ->> HPCS Server: + Spire Server ->> HPCS Server: HPCS Server ->> Spire Server: Create workloads identities to access the key - Spire Server ->> HPCS Server: + Spire Server ->> HPCS Server: HPCS Server ->> Vault: Create role and policy to access the key - Vault ->> HPCS Server: + Vault ->> HPCS Server: HPCS Server ->> Container Preparation container: SpiffeID & role to access the container, path to the secret Container Preparation container ->> Container Preparation container: Parse info file based on previous steps Container Preparation container ->> Supercomputer: Ship encrypted container - Supercomputer ->> Container Preparation container: ' + Supercomputer ->> Container Preparation container: ' Container Preparation container ->> Supercomputer: Ship info file - Supercomputer ->> Container Preparation container: + Supercomputer ->> Container Preparation container: Container Preparation container -->> Spire Agent: Kills - Spire Agent -->> Container Preparation container: + Spire Agent -->> Container Preparation container: Spire Agent -->> Container Preparation container: Dies Container Preparation container -->> User: Finishes ``` diff --git a/docs/architecture/data_preparation.md b/docs/architecture/data_preparation.md index 99b3aec..bec0efe 100644 --- a/docs/architecture/data_preparation.md +++ b/docs/architecture/data_preparation.md @@ -17,24 +17,24 @@ sequenceDiagram Data Preparation container ->> Vault: Log-in using SVID Vault ->> Data Preparation container: Returns an authentication token (write only on client's path) Data Preparation container ->> Vault: Write private key using authentication token - Vault ->> Data Preparation container: + Vault ->> Data Preparation container: Data Preparation container ->> HPCS Server: Request creation of workloads (compute nodes, users, groups ...) authorized to access the key and using SVID to authenticate HPCS Server ->> Spire Server: Validate SVID - Spire Server ->> HPCS Spire Agent: + Spire Server ->> HPCS Spire Agent: HPCS Spire Agent ->> Spire Server: Validate SVID - Spire Server ->> HPCS Server: + Spire Server ->> HPCS Server: HPCS Server ->> Spire Server: Create workloads identities to access the key - Spire Server ->> HPCS Server: + Spire Server ->> HPCS Server: HPCS Server ->> Vault: Create role and policy to access the key - Vault ->> HPCS Server: + Vault ->> HPCS Server: HPCS Server ->> Data Preparation container: SpiffeID & role to access the container, path to the secret Data Preparation container ->> Data Preparation container: Parse info file based on previous steps Data Preparation container ->> Supercomputer: Ship encrypted containe - Supercomputer ->> Data Preparation container: + Supercomputer ->> Data Preparation container: Data Preparation container ->> Supercomputer: Ship info file - Supercomputer ->> Data Preparation container: + Supercomputer ->> Data Preparation container: Data Preparation container -->> Spire Agent: Kills - Spire Agent -->> Data Preparation container: + Spire Agent -->> Data Preparation container: Spire Agent -->> Data Preparation container: Dies Data Preparation container -->> User: Finishes ``` diff --git a/docs/architecture/job_preparation.md b/docs/architecture/job_preparation.md index b91424c..0e59488 100644 --- a/docs/architecture/job_preparation.md +++ b/docs/architecture/job_preparation.md @@ -23,10 +23,10 @@ sequenceDiagram Job Preparation container ->> Job Preparation container: Parse info from info file Job Preparation container ->> Job Preparation container: Generate SBATCH file from template based on info gathered Job Preparation container ->> Login Node: Copy SBATCH File and HPCS Configuration file - Login Node ->> Job Preparation container: + Login Node ->> Job Preparation container: Job Preparation container ->> Job Preparation container: Generate keypair for output data Job Preparation container ->> Login Node: Copy encryption key - Login Node ->> Job Preparation container: + Login Node ->> Job Preparation container: end rect rgb(191, 223, 255) @@ -68,13 +68,13 @@ sequenceDiagram Application container ->> Application container: Encrypt output directory Application container -->> Compute node: Finishes Compute node -->> Spire Agent: Kills - Spire Agent -->> Compute node: + Spire Agent -->> Compute node: Spire Agent -->> Compute node: Dies Compute node ->> Scheduler: Becomes available deactivate Job Preparation container end Job Preparation container ->> Login Node: Close SSH connection - Login Node ->> Job Preparation container: + Login Node ->> Job Preparation container: Login Node ->> Job Preparation container: Close SSH connection Job Preparation container -->> User: Finishes diff --git a/docs/cli/container_preparation.md b/docs/cli/container_preparation.md index 04458ab..15350ee 100644 --- a/docs/cli/container_preparation.md +++ b/docs/cli/container_preparation.md @@ -2,7 +2,7 @@ Using the cli directly isn't recommended, the supported way is through docker's entrypoint. -The container preparation cli allows the user to create/encrypt/ship a HPCS ready image based on any OCI image. +The container preparation cli allows the user to create/encrypt/ship a HPCS ready image based on any OCI image. ``` usage: prepare_container.py [-h] --base-oci-image BASE_OCI_IMAGE --sif-path SIF_PATH [--encrypted] [--docker-path DOCKER_PATH] diff --git a/docs/cli/data_preparation.md b/docs/cli/data_preparation.md index b2d156b..1a426c0 100644 --- a/docs/cli/data_preparation.md +++ b/docs/cli/data_preparation.md @@ -2,7 +2,7 @@ Using the cli directly isn't recommended, the supported way is through docker's entrypoint. -The data preparation cli allows the user to encrypt/ship an encrypted archive based on any source directory. +The data preparation cli allows the user to encrypt/ship an encrypted archive based on any source directory. ``` usage: prepare_data.py [-h] --input-path INPUT_PATH --output-path OUTPUT_PATH @@ -24,8 +24,8 @@ Examples python3 ./client/data_preparation/prepare_data.py --help # Run the data preparation while specifying every parameters -python3 ./client/data_preparation/prepare_data.py --input-path $(pwd)/input_data --output-path $(pwd) +python3 ./client/data_preparation/prepare_data.py --input-path $(pwd)/input_data --output-path $(pwd) # Run the data preparation while specifying every parameters (shortened version) -python3 ./client/data_preparation/prepare_data.py -i $(pwd)/input_data -o $(pwd) +python3 ./client/data_preparation/prepare_data.py -i $(pwd)/input_data -o $(pwd) ``` diff --git a/docs/cli/job_preparation.md b/docs/cli/job_preparation.md index 343bb23..b4cc362 100644 --- a/docs/cli/job_preparation.md +++ b/docs/cli/job_preparation.md @@ -60,7 +60,7 @@ Will create a slurm job with the following configuration : - As etellier - for an hour -Also : +Also : - HPCS will run the job using `/scratch/project_462000031/etellier` as its workdir - The application will be handled using info file at `/pfs/lustrep4/scratch/project_462000031/etellier/encrypted_prepared_jp2a.sif.info.yaml` - The application will be handled using info file at `/pfs/lustrep4/scratch/project_462000031/etellier/encrypted_jp2a_input.tgz.info.yaml` diff --git a/docs/configuration/client.md b/docs/configuration/client.md index c7d0c5d..e738434 100644 --- a/docs/configuration/client.md +++ b/docs/configuration/client.md @@ -8,7 +8,7 @@ Client's configuration has to respect `ini` configuration format and essentially [spire-server] address = localhost port = 31147 -trust-domain = hpcs +trust-domain = hpcs [hpcs-server] url = http://localhost:10080 @@ -23,7 +23,7 @@ username = etellier ## Reference -### `spire-server` +### `spire-server` This section describes the connection to the spire-server - `address` : address of the spire-server diff --git a/docs/configuration/server.md b/docs/configuration/server.md index ea3f52b..ef1a1bc 100644 --- a/docs/configuration/server.md +++ b/docs/configuration/server.md @@ -24,7 +24,7 @@ server-role = hpcs-server ## Reference -### `spire-server` +### `spire-server` This section describes the connection to the spire-server - `address` : address of the spire-server diff --git a/k8s/create-certs.yaml b/k8s/create-certs.yaml new file mode 100644 index 0000000..fbafbad --- /dev/null +++ b/k8s/create-certs.yaml @@ -0,0 +1,24 @@ +- name: Create directory for certificates if it is not there + file: + path: hpcs-stack/charts/spire/files + state: directory + mode: '0770' + +- name: Create spire-oidc private key + openssl_privatekey: + path: hpcs-stack/charts/spire/files/spire-oidc.key + size: 4096 + +- name: Create spire-oidc csr + openssl_csr: + path: hpcs-stack/charts/spire/files/spire-oidc.csr + privatekey_path: hpcs-stack/charts/spire/files/spire-oidc.key + common_name: spire-oidc + subject_alt_name: 'DNS:spire-oidc' + +- name: Create spire-oidc certificate + openssl_certificate: + provider: selfsigned + path: hpcs-stack/charts/spire/files/spire-oidc.crt + privatekey_path: hpcs-stack/charts/spire/files/spire-oidc.key + csr_path: hpcs-stack/charts/spire/files/spire-oidc.csr diff --git a/k8s/deploy-all.yaml b/k8s/deploy-all.yaml index 1011dd0..c9cb488 100644 --- a/k8s/deploy-all.yaml +++ b/k8s/deploy-all.yaml @@ -9,67 +9,7 @@ } tasks: - - name: create hpcs namespace - k8s: - state: present - src: hpcs-namespace.yaml - - - name: create spire-server account - k8s: - state: present - src: spire-server-account.yaml - - - name: create spire-server clusterrole - k8s: - state: present - src: spire-server-cluster-role.yaml - - - name: create spire-server configmap - k8s: - state: present - src: spire-server-configmap.yaml - - - name: create spire-oidc configmap - k8s: - state: present - src: spire-oidc-configmap.yaml - - - name: create spire nginx proxy configmap - k8s: - state: present - src: spire-server-nginx-configmap.yaml - - - name: Create spire-oidc private key - openssl_privatekey: - path: /etc/certs/hpcs-spire-oidc/selfsigned.key - size: 4096 - - - name: Create spire-oidc csr - openssl_csr: - path: /etc/certs/hpcs-spire-oidc/selfsigned.csr - privatekey_path: /etc/certs/hpcs-spire-oidc/selfsigned.key - - - name: Create spire-oidc certificate - openssl_certificate: - provider: selfsigned - path: /etc/certs/hpcs-spire-oidc/selfsigned.crt - privatekey_path: /etc/certs/hpcs-spire-oidc/selfsigned.key - csr_path: /etc/certs/hpcs-spire-oidc/selfsigned.csr - - - name: create spire-server pod (spire-server, spire-oidc, hpcs-nginx) - k8s: - state: present - src: spire-server-statefulset.yaml - - - name: create spire-server service (expose spire server port) - k8s: - state: present - src: spire-server-service.yaml - - - name: create spire-server service (expose spire oidc port) - k8s: - state: present - src: spire-oidc-service.yaml + - include_tasks: create-certs.yaml - name: Add hashicorp to helm repositories kubernetes.core.helm_repository: @@ -81,10 +21,11 @@ release_name: vault chart_ref: hashicorp/vault release_namespace: hpcs + create_namespace: true chart_version: 0.27.0 - name: Wait for vault to be created - shell: "kubectl get po -n hpcs vault-0 --output=jsonpath='{.status}'" + shell: "kubectl get --namespace hpcs pod/vault-0 --output=jsonpath='{.status}'" register: pod_ready_for_init until: (pod_ready_for_init.stdout | from_json)['containerStatuses'] is defined retries: 10 @@ -133,12 +74,27 @@ dest: /tmp/policy when: vault_init.rc == 0 + - name: Deploy hpcs-stack + kubernetes.core.helm: + release_name: hpcs-stack + chart_ref: hpcs-stack + release_namespace: hpcs + create_namespace: true + + - name: Wait for spire-oidc to be ready + shell: "kubectl get --namespace hpcs pod/spire-server-0 --output=jsonpath='{.status.containerStatuses[*].ready}'" + register: pod_spire_oidc + until: pod_spire_oidc.stdout == "true true true" + # until: (pod_spire_oidc.stdout | from_json)['containerStatuses'][?name==spire-oidc]['ready'] + retries: 10 + delay: 2 + - name: Copy oidc cert to vault's pod kubernetes.core.k8s_cp: namespace: hpcs pod: vault-0 remote_path: /tmp/cert - local_path: /etc/certs/hpcs-spire-oidc/selfsigned.crt + local_path: hpcs-stack/charts/spire/files/spire-oidc.crt when: vault_init.rc == 0 - name: Write oidc config to vault @@ -186,7 +142,7 @@ namespace: hpcs pod: spire-server-0 container: spire-server - command: ./bin/spire-server entry create -parentID spiffe://hpcs/spire/agent/k8s_psat/{{ (kubectl_node_info.stdout | from_json)['items'][0]['metadata']['name'] }}/{{ (kubectl_node_info.stdout | from_json)['items'][0]['metadata']['uid'] }} -spiffeID spiffe://hpcs/hpcs-server/workload -selector unix:uid:0 + command: ./bin/spire-server entry create -parentID spiffe://hpcs/spire/agent/k8s_psat/hpcs/{{ (kubectl_node_info.stdout | from_json)['items'][0]['metadata']['uid'] }} -spiffeID spiffe://hpcs/hpcs-server/workload -selector unix:uid:0 register: cgroups_check when: cgroups_check.rc == 0 ignore_errors: True @@ -196,51 +152,7 @@ namespace: hpcs pod: spire-server-0 container: spire-server - command: ./bin/spire-server entry create -parentID spiffe://hpcs/spire/agent/k8s_psat/{{ (kubectl_node_info.stdout | from_json)['items'][0]['metadata']['name'] }}/{{ (kubectl_node_info.stdout | from_json)['items'][0]['metadata']['uid'] }} -spiffeID spiffe://hpcs/hpcs-server/workload -selector k8s:pod-name:hpcs-server + command: ./bin/spire-server entry create -parentID spiffe://hpcs/spire/agent/k8s_psat/hpcs/{{ (kubectl_node_info.stdout | from_json)['items'][0]['metadata']['uid'] }} -spiffeID spiffe://hpcs/hpcs-server/workload -selector k8s:pod-name:hpcs-server register: cgroups_check when: cgroups_check.rc == 1 ignore_errors: True - - - name: Expose vault's web port - kubernetes.core.k8s_service: - state: present - name: vault-external - type: NodePort - namespace: hpcs - ports: - - port: 8200 - protocol: TCP - selector: - service: vault - - - name: Create hpcs-server account - k8s: - state: present - src: hpcs-server-account.yaml - - - name: Create hpcs-spire account - k8s: - state: present - src: hpcs-spire-account.yaml - - - name: Create hpcs-server configmap - k8s: - state: present - src: hpcs-server-configmap.yaml - - - name: Create hpcs-server statefulset and pod - k8s: - state: present - src: hpcs-server-statefulset.yaml - - - name: Expose hpcs-server's web port - kubernetes.core.k8s_service: - state: present - name: hpcs-external - type: NodePort - namespace: hpcs - ports: - - port: 10080 - protocol: TCP - selector: - service: hpcs-server diff --git a/k8s/hpcs-namespace.yaml b/k8s/hpcs-namespace.yaml deleted file mode 100644 index 8280228..0000000 --- a/k8s/hpcs-namespace.yaml +++ /dev/null @@ -1,4 +0,0 @@ -apiVersion: v1 -kind: Namespace -metadata: - name: hpcs diff --git a/k8s/hpcs-stack/.helmignore b/k8s/hpcs-stack/.helmignore new file mode 100644 index 0000000..0e8a0eb --- /dev/null +++ b/k8s/hpcs-stack/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/k8s/hpcs-stack/Chart.yaml b/k8s/hpcs-stack/Chart.yaml new file mode 100644 index 0000000..ad9b8ff --- /dev/null +++ b/k8s/hpcs-stack/Chart.yaml @@ -0,0 +1,27 @@ +apiVersion: v2 +name: hpcs-stack +description: HPCS stack +dependencies: + - name: hpcs + - name: spire + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.2.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +# It is recommended to use it with quotes. +appVersion: "0.2.0" diff --git a/k8s/hpcs-stack/charts/hpcs/.helmignore b/k8s/hpcs-stack/charts/hpcs/.helmignore new file mode 100644 index 0000000..0e8a0eb --- /dev/null +++ b/k8s/hpcs-stack/charts/hpcs/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/k8s/hpcs-stack/charts/hpcs/Chart.yaml b/k8s/hpcs-stack/charts/hpcs/Chart.yaml new file mode 100644 index 0000000..1308773 --- /dev/null +++ b/k8s/hpcs-stack/charts/hpcs/Chart.yaml @@ -0,0 +1,24 @@ +apiVersion: v2 +name: hpcs +description: HPCS server + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +# It is recommended to use it with quotes. +appVersion: "0.1.1" diff --git a/k8s/hpcs-server-account.yaml b/k8s/hpcs-stack/charts/hpcs/templates/hpcs-server-account.yaml similarity index 78% rename from k8s/hpcs-server-account.yaml rename to k8s/hpcs-stack/charts/hpcs/templates/hpcs-server-account.yaml index e51e313..873487b 100644 --- a/k8s/hpcs-server-account.yaml +++ b/k8s/hpcs-stack/charts/hpcs/templates/hpcs-server-account.yaml @@ -2,4 +2,3 @@ apiVersion: v1 kind: ServiceAccount metadata: name: hpcs-server - namespace: hpcs diff --git a/k8s/hpcs-server-configmap.yaml b/k8s/hpcs-stack/charts/hpcs/templates/hpcs-server-configmap.yaml similarity index 65% rename from k8s/hpcs-server-configmap.yaml rename to k8s/hpcs-stack/charts/hpcs/templates/hpcs-server-configmap.yaml index f58e239..4a59f55 100644 --- a/k8s/hpcs-server-configmap.yaml +++ b/k8s/hpcs-stack/charts/hpcs/templates/hpcs-server-configmap.yaml @@ -2,13 +2,12 @@ apiVersion: v1 kind: ConfigMap metadata: name: hpcs-server - namespace: hpcs data: hpcs-server.conf: | [spire-server] - address = localhost - port = 8081 - trust-domain = hpcs + address = {{ .Values.server.spire.address }} + port = {{ .Values.server.spire.port }} + trust-domain = {{ .Values.server.spire.trustDomain }} pre-command = "" spire-server-bin = spire-server socket-path = /var/run/sockets/server/api.sock @@ -17,16 +16,16 @@ data: spire-agent-socket = /run/sockets/agent/agent.sock [vault] - url = http://vault:8200 - server-role = hpcs-server + url = {{ .Values.server.vault.address }}:{{ .Values.server.vault.port }} + server-role = {{ .Values.server.vault.role }} agent.conf: | agent { data_dir = "./data/agent" - log_level = "DEBUG" - trust_domain = "hpcs" - server_address = "spire-server" - server_port = 8081 + log_level = "{{ .Values.agent.logLevel }}" + trust_domain = "{{ .Values.server.spire.trustDomain }}" + server_address = "{{ .Values.agent.server.address }}" + server_port = {{ .Values.agent.server.port }} socket_path = "/var/run/sockets/agent/agent.sock" admin_socket_path = "/var/run/sockets/admin/admin.sock" @@ -44,7 +43,7 @@ data: NodeAttestor "k8s_psat" { plugin_data { - cluster = "docker-desktop" + cluster = "{{ .Values.agent.clusterName }}" } } diff --git a/k8s/hpcs-server-statefulset.yaml b/k8s/hpcs-stack/charts/hpcs/templates/hpcs-server-statefulset.yaml similarity index 84% rename from k8s/hpcs-server-statefulset.yaml rename to k8s/hpcs-stack/charts/hpcs/templates/hpcs-server-statefulset.yaml index 276f876..8bc8a24 100644 --- a/k8s/hpcs-server-statefulset.yaml +++ b/k8s/hpcs-stack/charts/hpcs/templates/hpcs-server-statefulset.yaml @@ -2,7 +2,6 @@ apiVersion: apps/v1 kind: StatefulSet metadata: name: hpcs-server - namespace: hpcs labels: app: hpcs-server spec: @@ -13,7 +12,6 @@ spec: serviceName: hpcs-server template: metadata: - namespace: hpcs labels: app: hpcs-server spec: @@ -21,9 +19,10 @@ spec: shareProcessNamespace: true containers: - name: hpcs-server - image: ghcr.io/cscfi/hpcs/server:0.1.1 + image: {{ .Values.image.repository }}/server:{{ .Values.image.tag | default .Chart.AppVersion }} + imagePullPolicy: {{ .Values.image.pullPolicy }} ports: - - containerPort: 10080 + - containerPort: {{ .Values.server.port }} name: hpcs-server volumeMounts: - name: hpcs-server-configs @@ -53,10 +52,9 @@ spec: volumeClaimTemplates: - metadata: name: spire-agent-data - namespace: hpcs spec: accessModes: - ReadWriteOnce resources: requests: - storage: 1Gi + storage: {{ .Values.storageSize }} diff --git a/k8s/hpcs-spire-account.yaml b/k8s/hpcs-stack/charts/hpcs/templates/hpcs-spire-account.yaml similarity index 78% rename from k8s/hpcs-spire-account.yaml rename to k8s/hpcs-stack/charts/hpcs/templates/hpcs-spire-account.yaml index 690a8a6..2b4e064 100644 --- a/k8s/hpcs-spire-account.yaml +++ b/k8s/hpcs-stack/charts/hpcs/templates/hpcs-spire-account.yaml @@ -2,4 +2,3 @@ apiVersion: v1 kind: ServiceAccount metadata: name: hpcs-spire - namespace: hpcs diff --git a/k8s/hpcs-stack/charts/hpcs/values.yaml b/k8s/hpcs-stack/charts/hpcs/values.yaml new file mode 100644 index 0000000..12eb15f --- /dev/null +++ b/k8s/hpcs-stack/charts/hpcs/values.yaml @@ -0,0 +1,22 @@ +image: + repository: ghcr.io/cscfi/hpcs + # tag of the image, defaults to .Chart.AppVersion + # tag: + pullPolicy: IfNotPresent +server: + port: 10080 + spire: + address: localhost + port: 8081 + trustDomain: hpcs + vault: + address: http://vault + port: 8200 + role: hpcs-server +agent: + logLevel: DEBUG + server: + address: spire-server + port: 8081 + clusterName: hpcs +storageSize: "1Gi" diff --git a/k8s/hpcs-stack/charts/spire/.helmignore b/k8s/hpcs-stack/charts/spire/.helmignore new file mode 100644 index 0000000..0e8a0eb --- /dev/null +++ b/k8s/hpcs-stack/charts/spire/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/k8s/hpcs-stack/charts/spire/Chart.yaml b/k8s/hpcs-stack/charts/spire/Chart.yaml new file mode 100644 index 0000000..64f7852 --- /dev/null +++ b/k8s/hpcs-stack/charts/spire/Chart.yaml @@ -0,0 +1,24 @@ +apiVersion: v2 +name: spire +description: Spire server + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +# It is recommended to use it with quotes. +appVersion: "1.9.0" diff --git a/k8s/hpcs-stack/charts/spire/templates/certificates.yaml b/k8s/hpcs-stack/charts/spire/templates/certificates.yaml new file mode 100644 index 0000000..3940d53 --- /dev/null +++ b/k8s/hpcs-stack/charts/spire/templates/certificates.yaml @@ -0,0 +1,12 @@ +--- +apiVersion: v1 +kind: Secret +metadata: + namespace: hpcs + name: spire-oidc-certs +type: kubernetes.io/tls +stringData: + tls.crt: | + {{- $.Files.Get "files/spire-oidc.crt" | nindent 4 }} + tls.key: | + {{- $.Files.Get "files/spire-oidc.key" | nindent 4 }} diff --git a/k8s/spire-oidc-configmap.yaml b/k8s/hpcs-stack/charts/spire/templates/spire-oidc-configmap.yaml similarity index 86% rename from k8s/spire-oidc-configmap.yaml rename to k8s/hpcs-stack/charts/spire/templates/spire-oidc-configmap.yaml index 9de2be7..6cc994a 100644 --- a/k8s/spire-oidc-configmap.yaml +++ b/k8s/hpcs-stack/charts/spire/templates/spire-oidc-configmap.yaml @@ -2,10 +2,9 @@ apiVersion: v1 kind: ConfigMap metadata: name: spire-oidc - namespace: hpcs data: oidc-discovery-provider.conf: | - log_level = "debug" + log_level = "{{ .Values.oidc.logLevel }}" domains = ["spire-oidc"] listen_socket_path = "/tmp/spire-server/private/oidc-api.sock" diff --git a/k8s/spire-server-account.yaml b/k8s/hpcs-stack/charts/spire/templates/spire-server-account.yaml similarity index 78% rename from k8s/spire-server-account.yaml rename to k8s/hpcs-stack/charts/spire/templates/spire-server-account.yaml index 2135836..acf9944 100644 --- a/k8s/spire-server-account.yaml +++ b/k8s/hpcs-stack/charts/spire/templates/spire-server-account.yaml @@ -2,4 +2,3 @@ apiVersion: v1 kind: ServiceAccount metadata: name: spire-server - namespace: hpcs diff --git a/k8s/spire-server-cluster-role.yaml b/k8s/hpcs-stack/charts/spire/templates/spire-server-cluster-role.yaml similarity index 95% rename from k8s/spire-server-cluster-role.yaml rename to k8s/hpcs-stack/charts/spire/templates/spire-server-cluster-role.yaml index 41defa1..035f13e 100644 --- a/k8s/spire-server-cluster-role.yaml +++ b/k8s/hpcs-stack/charts/spire/templates/spire-server-cluster-role.yaml @@ -11,7 +11,6 @@ rules: - apiGroups: [""] resources: ["configmaps","pods","nodes"] verbs: ["patch", "get", "list"] - --- # Binds above cluster role to spire-server service account kind: ClusterRoleBinding @@ -21,7 +20,7 @@ metadata: subjects: - kind: ServiceAccount name: spire-server - namespace: hpcs + namespace: {{ .Release.Namespace }} roleRef: kind: ClusterRole name: spire-server-trust-role diff --git a/k8s/spire-server-configmap.yaml b/k8s/hpcs-stack/charts/spire/templates/spire-server-configmap.yaml similarity index 67% rename from k8s/spire-server-configmap.yaml rename to k8s/hpcs-stack/charts/spire/templates/spire-server-configmap.yaml index 6a9a079..b072340 100644 --- a/k8s/spire-server-configmap.yaml +++ b/k8s/hpcs-stack/charts/spire/templates/spire-server-configmap.yaml @@ -1,29 +1,25 @@ apiVersion: v1 - kind: ConfigMap metadata: name: spire-bundle - namespace: hpcs - --- apiVersion: v1 kind: ConfigMap metadata: name: spire-server - namespace: hpcs data: server.conf: | server { - bind_address = "0.0.0.0" - bind_port = "8081" + bind_address = "{{ .Values.server.bindAddress }}" + bind_port = "{{ .Values.server.port }}" socket_path = "/tmp/spire-server/private/api.sock" - trust_domain = "hpcs" + trust_domain = "{{ .Values.server.trustDomain }}" data_dir = "/run/spire/data" - log_level = "DEBUG" - ca_key_type = "rsa-2048" + log_level = "{{ .Values.server.logLevel }}" + ca_key_type = "{{ .Values.server.caKeyType }}" jwt_issuer = "spire-server" - default_jwt_svid_ttl = "1h" + default_jwt_svid_ttl = "{{ .Values.server.defaultJWTttl }}" ca_subject = { country = ["US"], @@ -43,7 +39,7 @@ data: NodeAttestor "k8s_psat" { plugin_data { clusters = { - "docker-desktop" = { + "{{ .Values.server.clusterName }}" = { use_token_review_api_validation = true service_account_allow_list = ["hpcs:hpcs-server"] } @@ -59,15 +55,15 @@ data: Notifier "k8sbundle" { plugin_data { - namespace = "hpcs" + namespace = "{{ .Release.Namespace }}" } } } health_checks { listener_enabled = true - bind_address = "0.0.0.0" - bind_port = "8080" + bind_address = "{{ .Values.server.health.bindAddress }}" + bind_port = "{{ .Values.server.health.port }}" live_path = "/live" ready_path = "/ready" } diff --git a/k8s/spire-server-nginx-configmap.yaml b/k8s/hpcs-stack/charts/spire/templates/spire-server-nginx-configmap.yaml similarity index 76% rename from k8s/spire-server-nginx-configmap.yaml rename to k8s/hpcs-stack/charts/spire/templates/spire-server-nginx-configmap.yaml index 0eba7ec..2c1b75e 100644 --- a/k8s/spire-server-nginx-configmap.yaml +++ b/k8s/hpcs-stack/charts/spire/templates/spire-server-nginx-configmap.yaml @@ -2,7 +2,6 @@ apiVersion: v1 kind: ConfigMap metadata: name: hpcs-nginx - namespace: hpcs data: nginx.conf: | events {} @@ -15,9 +14,9 @@ data: } server{ - listen 443 ssl; - ssl_certificate /certs/selfsigned.crt; - ssl_certificate_key /certs/selfsigned.key; + listen {{ .Values.oidc.port }} ssl; + ssl_certificate /certs/tls.crt; + ssl_certificate_key /certs/tls.key; ssl_protocols TLSv1 TLSv1.1 TLSv1.2; ssl_ciphers HIGH:!aNULL:!MD5; location / { diff --git a/k8s/spire-server-statefulset.yaml b/k8s/hpcs-stack/charts/spire/templates/spire-server-statefulset.yaml similarity index 78% rename from k8s/spire-server-statefulset.yaml rename to k8s/hpcs-stack/charts/spire/templates/spire-server-statefulset.yaml index f1baa0d..2dd4dcb 100644 --- a/k8s/spire-server-statefulset.yaml +++ b/k8s/hpcs-stack/charts/spire/templates/spire-server-statefulset.yaml @@ -2,7 +2,6 @@ apiVersion: apps/v1 kind: StatefulSet metadata: name: spire-server - namespace: hpcs labels: app: spire-server spec: @@ -13,7 +12,6 @@ spec: serviceName: spire-server template: metadata: - namespace: hpcs labels: app: spire-server spec: @@ -33,15 +31,16 @@ spec: mountPath: /certs readOnly: true ports: - - containerPort: 443 + - containerPort: {{ .Values.oidc.port }} name: hpcs-nginx - name: spire-server - image: ghcr.io/spiffe/spire-server:1.9.0 + image: {{ .Values.server.image.repository }}/spire-server:{{ .Values.server.image.tag | default .Chart.AppVersion }} + imagePullPolicy: {{ .Values.server.image.pullPolicy }} args: - -config - /run/spire/config/server.conf ports: - - containerPort: 8081 + - containerPort: {{ .Values.server.port }} name: spire-server volumeMounts: - name: spire-config @@ -56,7 +55,7 @@ spec: livenessProbe: httpGet: path: /live - port: 8080 + port: {{ .Values.server.health.port }} failureThreshold: 2 initialDelaySeconds: 15 periodSeconds: 60 @@ -64,11 +63,12 @@ spec: readinessProbe: httpGet: path: /ready - port: 8080 + port: {{ .Values.server.health.port }} initialDelaySeconds: 5 periodSeconds: 5 - name: spire-oidc - image: ghcr.io/spiffe/oidc-discovery-provider:1.9.0 + image: {{ .Values.server.image.repository }}/oidc-discovery-provider:{{ .Values.server.image.tag | default .Chart.AppVersion }} + imagePullPolicy: {{ .Values.server.image.pullPolicy }} args: - -config - /run/spire/oidc/config/oidc-discovery-provider.conf @@ -85,7 +85,7 @@ spec: readinessProbe: httpGet: path: /ready - port: 8008 + port: {{ .Values.server.health.port }} initialDelaySeconds: 5 periodSeconds: 5 timeoutSeconds: 3 @@ -104,16 +104,14 @@ spec: configMap: name: spire-oidc - name: nginx-certs - hostPath: - path: /etc/certs/hpcs-spire-oidc - type: DirectoryOrCreate + secret: + secretName: spire-oidc-certs volumeClaimTemplates: - metadata: name: spire-data - namespace: hpcs spec: accessModes: - ReadWriteOnce resources: requests: - storage: 1Gi + storage: {{ .Values.server.storageSize }} diff --git a/k8s/hpcs-stack/charts/spire/values.yaml b/k8s/hpcs-stack/charts/spire/values.yaml new file mode 100644 index 0000000..b1f0a4f --- /dev/null +++ b/k8s/hpcs-stack/charts/spire/values.yaml @@ -0,0 +1,21 @@ +oidc: + logLevel: debug + port: 443 +server: + image: + repository: ghcr.io/spiffe + # tag of the image, defaults to .Chart.AppVersion + # tag: + pullPolicy: IfNotPresent + + bindAddress: "0.0.0.0" + port: 8081 + trustDomain: hpcs + logLevel: DEBUG + caKeyType: rsa-4096 + defaultJWTttl: "1h" + clusterName: hpcs + health: + bindAddress: "0.0.0.0" + port: 8080 + storageSize: "1G" diff --git a/k8s/hpcs-server-service.yaml b/k8s/hpcs-stack/templates/hpcs-server-service.yaml similarity index 51% rename from k8s/hpcs-server-service.yaml rename to k8s/hpcs-stack/templates/hpcs-server-service.yaml index 59d45fc..85fceaf 100644 --- a/k8s/hpcs-server-service.yaml +++ b/k8s/hpcs-stack/templates/hpcs-server-service.yaml @@ -3,12 +3,14 @@ apiVersion: v1 kind: Service metadata: name: hpcs-server - namespace: hpcs spec: - clusterIP: None + type: {{ .Values.hpcs.service.type }} selector: app: hpcs-server ports: - name: https - port: 10080 + port: {{ .Values.hpcs.server.port }} targetPort: hpcs-server + {{- if eq .Values.hpcs.service.type "NodePort" }} + nodePort: {{ .Values.hpcs.service.port }} + {{- end -}} diff --git a/k8s/hpcs-stack/templates/spire-oidc-service.yaml b/k8s/hpcs-stack/templates/spire-oidc-service.yaml new file mode 100644 index 0000000..385fc88 --- /dev/null +++ b/k8s/hpcs-stack/templates/spire-oidc-service.yaml @@ -0,0 +1,16 @@ +# Service definition for spire-oidc (expose the OIDC socket) +apiVersion: v1 +kind: Service +metadata: + name: spire-oidc +spec: + type: {{ .Values.spire.oidc.service.type }} + selector: + app: spire-server + ports: + - name: https + port: {{ .Values.spire.oidc.port }} + targetPort: hpcs-nginx + {{- if eq .Values.spire.oidc.service.type "NodePort" }} + nodePort: {{ .Values.spire.oidc.service.port }} + {{- end -}} diff --git a/k8s/hpcs-stack/templates/spire-server-service.yaml b/k8s/hpcs-stack/templates/spire-server-service.yaml new file mode 100644 index 0000000..27c0139 --- /dev/null +++ b/k8s/hpcs-stack/templates/spire-server-service.yaml @@ -0,0 +1,16 @@ +# Service definition for spire server +apiVersion: v1 +kind: Service +metadata: + name: spire-server +spec: + type: {{ .Values.spire.server.service.type }} + selector: + app: spire-server + ports: + - name: tcp-spire + port: {{ .Values.spire.server.port }} + targetPort: spire-server + {{- if eq .Values.spire.server.service.type "NodePort" }} + nodePort: {{ .Values.spire.server.service.port }} + {{- end -}} diff --git a/k8s/hpcs-stack/templates/vault-service.yaml b/k8s/hpcs-stack/templates/vault-service.yaml new file mode 100644 index 0000000..3492b44 --- /dev/null +++ b/k8s/hpcs-stack/templates/vault-service.yaml @@ -0,0 +1,20 @@ +{{- if .Values.vaultService.present }} +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/instance: vault + app.kubernetes.io/name: vault + name: vault-external +spec: + ports: + - name: port-1 + nodePort: {{ .Values.vaultService.nodePort }} + port: {{ .Values.hpcs.server.vault.port }} + protocol: TCP + selector: + app.kubernetes.io/instance: vault + app.kubernetes.io/name: vault + component: server + type: NodePort +{{- end }} diff --git a/k8s/hpcs-stack/values.yaml b/k8s/hpcs-stack/values.yaml new file mode 100644 index 0000000..704d326 --- /dev/null +++ b/k8s/hpcs-stack/values.yaml @@ -0,0 +1,57 @@ +spire: + oidc: + logLevel: debug + port: 443 + service: + type: NodePort + port: 30001 + server: + image: + repository: ghcr.io/spiffe + # tag of the image, defaults to .Chart.AppVersion + # tag: + pullPolicy: IfNotPresent + + bindAddress: "0.0.0.0" + port: 8081 + trustDomain: hpcs + logLevel: DEBUG + caKeyType: rsa-4096 + defaultJWTttl: "1h" + clusterName: hpcs + health: + bindAddress: "0.0.0.0" + port: 8080 + storageSize: "1G" + service: + type: NodePort + port: 30002 +hpcs: + image: + repository: ghcr.io/cscfi/hpcs + # tag of the image, defaults to .Chart.AppVersion + # tag: + pullPolicy: IfNotPresent + server: + port: 10080 + spire: + address: localhost + port: 8081 + trustDomain: hpcs + vault: + address: http://vault + port: 8200 + role: hpcs-server + agent: + logLevel: DEBUG + server: + address: spire-server + port: 8081 + clusterName: hpcs + storageSize: "1Gi" + service: + type: NodePort + port: 30003 +vaultService: + present: true + nodePort: 30004 diff --git a/k8s/spire-oidc-service.yaml b/k8s/spire-oidc-service.yaml deleted file mode 100644 index c425c1e..0000000 --- a/k8s/spire-oidc-service.yaml +++ /dev/null @@ -1,14 +0,0 @@ -# Service definition for spire-oidc (expose the OIDC socket) -apiVersion: v1 -kind: Service -metadata: - name: spire-oidc - namespace: hpcs -spec: - type: LoadBalancer - selector: - app: spire-server - ports: - - name: https - port: 443 - targetPort: hpcs-nginx diff --git a/k8s/spire-server-service.yaml b/k8s/spire-server-service.yaml deleted file mode 100644 index 3e2baf2..0000000 --- a/k8s/spire-server-service.yaml +++ /dev/null @@ -1,14 +0,0 @@ -# Service definition for spire server -apiVersion: v1 -kind: Service -metadata: - name: spire-server - namespace: hpcs -spec: - type: LoadBalancer - selector: - app: spire-server - ports: - - name: tcp-spire - port: 8081 - targetPort: spire-server diff --git a/server/Dockerfile b/server/Dockerfile index da5ff95..1310111 100644 --- a/server/Dockerfile +++ b/server/Dockerfile @@ -4,14 +4,18 @@ FROM --platform=$BUILDPLATFORM python:3.9-alpine # Install necessary packages and spire-agent RUN apk add --no-cache \ - git=2.43.0-r0 \ + git=2.45.2-r0 \ + curl=8.11.0-r2 \ build-base=0.5-r3 \ - openssl=3.1.4-r5 && \ -wget -q https://github.com/spiffe/spire/releases/download/v1.9.0/spire-1.9.0-linux-amd64-musl.tar.gz && \ -tar xvf spire-1.9.0-linux-amd64-musl.tar.gz ; mv spire-1.9.0 /opt ; mv /opt/spire-1.9.0 /opt/spire && \ -ln -s /opt/spire/bin/spire-agent /usr/bin/spire-agent && \ -ln -s /opt/spire/bin/spire-server /usr/bin/spire-server && \ -rm -rf spire-1.9.0-linux-amd64-musl.tar.gz + openssl=3.3.2-r1 +# Install spire-agent +RUN curl -LsSf -o spire-1.9.0-linux-amd64-musl.tar.gz https://github.com/spiffe/spire/releases/download/v1.9.0/spire-1.9.0-linux-amd64-musl.tar.gz && \ + tar xvf spire-1.9.0-linux-amd64-musl.tar.gz && \ + mv spire-1.9.0 /opt && \ + mv /opt/spire-1.9.0 /opt/spire && \ + ln -s /opt/spire/bin/spire-agent /usr/bin/spire-agent && \ + ln -s /opt/spire/bin/spire-server /usr/bin/spire-server && \ + rm -rf spire-1.9.0-linux-amd64-musl.tar.gz # Copy server COPY ./server /server @@ -21,7 +25,7 @@ WORKDIR /server # Install dependencies RUN pip install --no-cache-dir -r ./requirements.txt && \ -pip install --no-cache-dir git+https://github.com/HewlettPackard/py-spiffe.git@3640af9d6629c05e027f99010abc934cb74122a8 + pip install --no-cache-dir git+https://github.com/HewlettPackard/py-spiffe.git@3640af9d6629c05e027f99010abc934cb74122a8 # Copy utils COPY ./utils /server/utils diff --git a/server/entrypoint.sh b/server/entrypoint.sh index 5aac004..9fe6637 100755 --- a/server/entrypoint.sh +++ b/server/entrypoint.sh @@ -21,11 +21,14 @@ rm -rf /tmp/data spire-agent run -config /tmp/agent.conf || end_entrypoint 0 1 & spire_agent_pid=$! -agent_socket_path=$(cat /tmp/agent.conf | grep "socket_path" | cut -d "=" -f2 | cut -d '"' -f1) +agent_socket_path=$(grep "socket_path" /tmp/agent.conf | cut -d "=" -f2 | cut -d '"' -f1) + +RED='\033[0;31m' +NC='\033[0m' sleep 10 -until [ -e $agent_socket_path ]; do - echo -e "${RED}[LUMI-SD][Data preparation] Spire workload api socket doesn't exist, waiting 10 seconds ${NC}" +until [ -e "${agent_socket_path}" ]; do + printf "%b[LUMI-SD][Data preparation] Spire workload api socket doesn't exist, waiting 10 seconds %b" "${RED}" "${NC}\n" sleep 10 done diff --git a/terraform/README.md b/terraform/README.md new file mode 100644 index 0000000..43937f8 --- /dev/null +++ b/terraform/README.md @@ -0,0 +1,108 @@ +# kind VM recipe + +Recipe to deploy a simple VM with a running [kind](https://kind.sigs.k8s.io/) in Pouta. + +## VM deployment + +The VM is defined in [Terraform](https://www.terraform.io/) with state stored in `-terraform-state` bucket deployed under you project in allas. + +To deploy/update, download a config file from Pouta for authentication (the `-openrc.sh`). +You will also need `S3` credentials for accessing the bucket, in the below recipe it assumes you have them nicely stored in [pass](https://www.passwordstore.org/). +Currently the VM also needs 2 secrets: + +- host SSH private key +- host SSH public key (not really secret but we have it classified as such) + +Code is looking for them in following locations: + +- `secrets/ssh_host_ed25519_key` +- `secrets/ssh_host_ed25519_key.pub` + +After cloning the repository unlock the secrets with + + -> git-crypt unlock + +Put public SSH keys with admin access to the `secrets/public_keys` file. +If you want some users to have just access to tunnel ports from the VM, add their keys to the `secrets/tunnel_keys` file, if not just `touch secrets/tunnel_keys`. +After both of those files are present, you should be able to deploy the VM. +Authenticate first: + + # authenticate + -> source project_2007468-openrc.sh + # for simplicity of this example we just export S3 creentials + -> export AWS_ACCESS_KEY_ID=$(pass fancy_project/aws_key) + -> export AWS_SECRET_ACCESS_KEY=$(pass fancy_project/aws_secret) + +For clean environment on the backend, instance name is defined using an included script `set-name.sh`. +Backend doesn't allow to just use variables for the backend file name, that is why we need to define it before executing `Terraform`. + + -> ./set-name.sh --name kitten --project + Call: terraform init [-reconfigure] -backend-config=tf-backend.tfvars + -> terraform init -reconfigure -backend-config=tf-backend.tfvars + +Now you can just deploy + + -> terraform apply + +And wait for things to finish, including package udpates and installations on the VM. +As one of the outputs you should see the address of your VM, e.g.: + + Outputs: + + address = "128.214.254.127" + +## Connecting to kind + +It takes a few moments for everything to finish setting up on the VM. +Once it finishes the VM should be running a configured `kind` cluster with a dashboard running. +You can download you config file and access the cluster, notice the access to the API is restricted to trusted networks only + + -> scp ubuntu@128.214.254.127:.kube/remote-config . + -> export KUBECONFIG=$(pwd)/remote-config + -> kubectl auth whoami + ATTRIBUTE VALUE + Username kubernetes-admin + Groups [kubeadm:cluster-admins system:authenticated] + +To, for example, check if the dashboard is ready + + -> kubectl get all --namespace kubernetes-dashboard + NAME READY STATUS RESTARTS AGE + pod/kubernetes-dashboard-api-5cd64dbc99-xjbj8 1/1 Running 0 2m54s + pod/kubernetes-dashboard-auth-5c8859fcbd-zt2lm 1/1 Running 0 2m54s + pod/kubernetes-dashboard-kong-57d45c4f69-5gv2d 1/1 Running 0 2m54s + pod/kubernetes-dashboard-metrics-scraper-df869c886-chxx4 1/1 Running 0 2m54s + pod/kubernetes-dashboard-web-6ccf8d967-fsctp 1/1 Running 0 2m54s + + NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE + service/kubernetes-dashboard-api ClusterIP 10.96.149.208 8000/TCP 2m55s + service/kubernetes-dashboard-auth ClusterIP 10.96.140.195 8000/TCP 2m55s + service/kubernetes-dashboard-kong-proxy ClusterIP 10.96.35.136 443/TCP 2m55s + service/kubernetes-dashboard-metrics-scraper ClusterIP 10.96.222.176 8000/TCP 2m55s + service/kubernetes-dashboard-web ClusterIP 10.96.139.1 8000/TCP 2m55s + + NAME READY UP-TO-DATE AVAILABLE AGE + deployment.apps/kubernetes-dashboard-api 1/1 1 1 2m54s + deployment.apps/kubernetes-dashboard-auth 1/1 1 1 2m54s + deployment.apps/kubernetes-dashboard-kong 1/1 1 1 2m54s + deployment.apps/kubernetes-dashboard-metrics-scraper 1/1 1 1 2m54s + deployment.apps/kubernetes-dashboard-web 1/1 1 1 2m54s + + NAME DESIRED CURRENT READY AGE + replicaset.apps/kubernetes-dashboard-api-5cd64dbc99 1 1 1 2m54s + replicaset.apps/kubernetes-dashboard-auth-5c8859fcbd 1 1 1 2m54s + replicaset.apps/kubernetes-dashboard-kong-57d45c4f69 1 1 1 2m54s + replicaset.apps/kubernetes-dashboard-metrics-scraper-df869c886 1 1 1 2m54s + replicaset.apps/kubernetes-dashboard-web-6ccf8d967 1 1 1 2m54s + +Dashboard by default in this case is not overly secure so the external route is not setup, to access: + + # Generate a token to login to the dashboard with + -> kubectl -n kubernetes-dashboard create token admin-user + # Forward the dashboard to your machine + -> kubectl -n kubernetes-dashboard port-forward svc/kubernetes-dashboard-kong-proxy 8443:443 + Forwarding from 127.0.0.1:8443 -> 8443 + Forwarding from [::1]:8443 -> 8443 + +And view the dashboard in your browser under `https://localhost:8443` using the generated token to login. +Note that the cluster and the dashboard use a self signed certificate so your browser is not going to like it. diff --git a/terraform/cloud-config.yaml b/terraform/cloud-config.yaml new file mode 100644 index 0000000..f17ed6a --- /dev/null +++ b/terraform/cloud-config.yaml @@ -0,0 +1,109 @@ +#cloud-config +package_update: true +package_upgrade: true +package_reboot_if_required: true +apt: + sources: + docker.list: + source: deb [arch=amd64] https://download.docker.com/linux/ubuntu $RELEASE stable + keyid: 9DC858229FC7DD38854AE2D88D81803C0EBFCD88 + helm.list: + source: deb [arch=amd64] https://baltocdn.com/helm/stable/debian/ all main + keyid: 81BF832E2F19CD2AA0471959294AC4827C1A168A # https://baltocdn.com/helm/signing.asc +packages: +- ca-certificates +- containerd.io +- curl +- docker-ce +- docker-ce-cli +- gnupg +- helm +- lsb-release +- uidmap +- net-tools +- yq +# fun utils +- git +- tmux +- wget +groups: +- docker +users: +- name: ubuntu + lock_passwd: true + shell: /bin/bash + ssh_authorized_keys: +%{ for key in public_keys ~} + - ${key} +%{ endfor ~} + groups: + - docker + - sudo + sudo: + - ALL=(ALL) NOPASSWD:ALL +- name: k8s-api + lock_passwd: true + shell: /usr/sbin/nologin + ssh_authorized_keys: +%{ for key in public_keys ~} + - ${key} +%{ endfor ~} +%{ for key in tunnel_keys ~} + - ${key} +%{ endfor ~} +ssh_genkeytypes: +- ed25519 +ssh_keys: + ed25519_private: | + ${ed25519_private} + ed25519_public: ${ed25519_public} +runcmd: +- systemctl disable --now docker.service docker.socket +- rm -f /var/run/docker.sock +- loginctl enable-linger ubuntu +- chown ubuntu:root /home/ubuntu # in some versions docker setup has problems without it +- su - ubuntu -c '/usr/local/sbin/setup.sh' +write_files: +- encoding: b64 + content: ${setup_sha512} + owner: root:root + path: /etc/setup-sha512 +- content: net.ipv4.ip_unprivileged_port_start=80 + path: /etc/sysctl.d/unprivileged_port_start.conf +- encoding: b64 + content: ${setup_sh} + owner: root:root + path: /usr/local/sbin/setup.sh + permissions: '0755' +- encoding: b64 + content: ${hpcs_cluster_yaml} + owner: root:root + path: /etc/hpcs/hpcs-cluster.yaml + permissions: '0644' +- encoding: b64 + content: ${kind_dashboard_admin_yaml} + owner: root:root + path: /etc/hpcs/admin-user.yaml + permissions: '0644' +- source: + uri: https://kind.sigs.k8s.io/dl/v0.24.0/kind-Linux-amd64 + owner: root:root + path: /usr/bin/kind + permissions: '0755' +- source: + uri: https://dl.k8s.io/v1.31.2/bin/linux/amd64/kubectl + owner: root:root + path: /usr/bin/kubectl + permissions: '0755' +fs_setup: +- label: data + filesystem: 'ext4' + device: /dev/vdb + overwrite: false +- label: docker + filesystem: 'ext4' + device: /dev/vdc + overwrite: false +mounts: +- ['LABEL=data', /var/lib/data, "ext4", "defaults"] +- ['LABEL=docker', /var/lib/docker, "ext4", "defaults"] diff --git a/terraform/files/admin-user.yaml b/terraform/files/admin-user.yaml new file mode 100644 index 0000000..9f24303 --- /dev/null +++ b/terraform/files/admin-user.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: admin-user + namespace: kubernetes-dashboard +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: admin-user +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: cluster-admin +subjects: +- kind: ServiceAccount + name: admin-user + namespace: kubernetes-dashboard diff --git a/terraform/files/hpcs-cluster.yaml b/terraform/files/hpcs-cluster.yaml new file mode 100644 index 0000000..78f4e3d --- /dev/null +++ b/terraform/files/hpcs-cluster.yaml @@ -0,0 +1,39 @@ +kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 +name: hpcs +networking: + apiServerAddress: 0.0.0.0 + apiServerPort: 6444 +nodes: +- role: control-plane + kubeadmConfigPatches: + - | + kind: InitConfiguration + nodeRegistration: + kubeletExtraArgs: + node-labels: "ingress-ready=true" + authorization-mode: "AlwaysAllow" + extraPortMappings: + - containerPort: 80 + hostPort: 80 + - containerPort: 443 + hostPort: 443 + - containerPort: 30001 + hostPort: 30001 + - containerPort: 30002 + hostPort: 30002 + - containerPort: 30003 + hostPort: 30003 + - containerPort: 30004 + hostPort: 30004 + kubeadmConfigPatchesJSON6902: + - group: kubeadm.k8s.io + version: v1beta3 + kind: ClusterConfiguration + patch: | + - op: add + path: /apiServer/certSANs/- + value: MY_PUBLIC_IP + - op: add + path: /apiServer/certSANs/- + value: MY_PUBLIC_HOSTNAME diff --git a/terraform/files/setup.sh b/terraform/files/setup.sh new file mode 100644 index 0000000..4deddbd --- /dev/null +++ b/terraform/files/setup.sh @@ -0,0 +1,19 @@ +#!/bin/bash -eu +export XDG_RUNTIME_DIR=/run/user/1000 + +/usr/bin/dockerd-rootless-setuptool.sh install -f + +MY_PUBLIC_IP=$(curl ifconfig.io 2> /dev/null) +export MY_PUBLIC_IP=${MY_PUBLIC_IP} +MY_PUBLIC_HOSTNAME=$(host "${MY_PUBLIC_IP}" | rev | cut -d " " -f 1 | tail -c +2 | rev) +export MY_PUBLIC_HOSTNAME=${MY_PUBLIC_HOSTNAME} +sed -e "s/MY_PUBLIC_IP/${MY_PUBLIC_IP}/" /etc/hpcs/hpcs-cluster.yaml > "${HOME}/hpcs-cluster.yaml" +sed -i -e "s/MY_PUBLIC_HOSTNAME/${MY_PUBLIC_HOSTNAME}/" "${HOME}/hpcs-cluster.yaml" +/usr/bin/kind create cluster --config "${HOME}/hpcs-cluster.yaml" + +yq --yaml-output ".clusters[0].cluster.server = \"https://${MY_PUBLIC_HOSTNAME}:6444\"" "${HOME}/.kube/config" > "${HOME}/.kube/remote-config" + +# Install dashboard +helm repo add kubernetes-dashboard https://kubernetes.github.io/dashboard/ +helm upgrade --install kubernetes-dashboard kubernetes-dashboard/kubernetes-dashboard --create-namespace --namespace kubernetes-dashboard +kubectl apply -f /etc/hpcs/admin-user.yaml diff --git a/terraform/secrets/ssh_host_ed25519_key b/terraform/secrets/ssh_host_ed25519_key new file mode 100644 index 0000000..0b4f1ca Binary files /dev/null and b/terraform/secrets/ssh_host_ed25519_key differ diff --git a/terraform/secrets/ssh_host_ed25519_key.pub b/terraform/secrets/ssh_host_ed25519_key.pub new file mode 100644 index 0000000..23deb23 Binary files /dev/null and b/terraform/secrets/ssh_host_ed25519_key.pub differ diff --git a/terraform/server.tf b/terraform/server.tf new file mode 100644 index 0000000..5db6c17 --- /dev/null +++ b/terraform/server.tf @@ -0,0 +1,318 @@ +# MIT License +# +# Copyright (c) 2024 CSC - IT Center for Science Ltd. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# Define required providers +terraform { + required_version = ">= 1.1.5" + required_providers { + openstack = { + source = "terraform-provider-openstack/openstack" + version = "~> 1.35.0" + } + } + backend "s3" { + # Pouta/Swift specifics + region = "regionOne" + skip_region_validation = true + skip_credentials_validation = true + endpoint = "a3s.fi" + } +} + +variable "instance_name" { + # set with set-name.sh + type = string + default = "hpcs.main" +} + +locals { + # Configuration of the instance + cloud_init = templatefile( + "cloud-config.yaml", + { + kind_dashboard_admin_yaml = base64encode(file("files/admin-user.yaml")), + setup_sha512 = base64encode(sha512(file("files/setup.sh"))), + ed25519_private = indent(4, file("secrets/ssh_host_ed25519_key")), + ed25519_public = file("secrets/ssh_host_ed25519_key.pub"), + hpcs_cluster_yaml = base64encode(file("files/hpcs-cluster.yaml")), + public_keys = setsubtract(split("\n", trim(file("secrets/public_keys"), "\n")), [""]), + tunnel_keys = setsubtract(split("\n", trim(file("secrets/tunnel_keys"), "\n")), [""]), + setup_sh = base64encode(file("files/setup.sh")), + }) +} + +# The actual VM is defined here +resource "openstack_compute_instance_v2" "instance" { + name = var.instance_name + image_name = "Ubuntu-24.04" + flavor_name = "standard.small" + user_data = local.cloud_init + security_groups = [ + openstack_networking_secgroup_v2.security_group.name, + ] + network { + uuid = openstack_networking_network_v2.instance_net.id + } + # Pouta API refuses to create the instance unless the subnet is ready to go + depends_on = [ + openstack_networking_subnet_v2.instance_subnet, + ] + lifecycle { + ignore_changes = [ + image_name, + ] + } +} + +# Network for the VM to be in. It is not allowed in most cases to have +# VMs directly in the public network on Pouta +resource "openstack_networking_network_v2" "instance_net" { + name = "${var.instance_name}-net" + admin_state_up = "true" +} + +# A router to attach the network defined earlier to the public network +resource "openstack_networking_router_v2" "router" { + name = "${var.instance_name}-router" + admin_state_up = "true" + # Magic UUID is the UUID of our public network, somewhat difficult + # to refer to it by name here so we are stuck with the magic thing + # for now + external_network_id = "26f9344a-2e81-4ef5-a018-7d20cff891ee" +} + +# Attachment of the router to the VM subnet +resource "openstack_networking_router_interface_v2" "interface" { + router_id = openstack_networking_router_v2.router.id + subnet_id = openstack_networking_subnet_v2.instance_subnet.id +} + +# The floating ip, which will be a public IP used to access the VM +resource "openstack_networking_floatingip_v2" "ip" { + pool = "public" + depends_on = [openstack_networking_router_interface_v2.interface] + lifecycle { + # Protect the public IP + prevent_destroy = true + } +} + +# Attachment of the IP to the instance. It is important to realize why +# this is separate from the floating ip it self. It is separate, so +# one can redeploy an instance and attach the IP to the new instance +# without the need to do anything about the IP object itself. +resource "openstack_compute_floatingip_associate_v2" "ip_attach" { + floating_ip = openstack_networking_floatingip_v2.ip.address + instance_id = openstack_compute_instance_v2.instance.id +} + +# Volume to store some data that we want to preserve during re-deployments +resource "openstack_blockstorage_volume_v3" "data" { + name = "${var.instance_name}-data" + size = 10 + lifecycle { + # Do not destroy the volume... ever... + prevent_destroy = true + } +} + +resource "openstack_blockstorage_volume_v3" "docker" { + name = "${var.instance_name}-docker" + size = 20 + lifecycle { + # Do not destroy the volume... ever... + prevent_destroy = true + } +} + +# Similarly to the floating ip case, we need an attachment of the volume +# defined above +resource "openstack_compute_volume_attach_v2" "data" { + instance_id = openstack_compute_instance_v2.instance.id + volume_id = openstack_blockstorage_volume_v3.data.id +} + +resource "openstack_compute_volume_attach_v2" "docker" { + instance_id = openstack_compute_instance_v2.instance.id + volume_id = openstack_blockstorage_volume_v3.docker.id +} + +####################################################################### +# Security group and its rules +####################################################################### +resource "openstack_networking_secgroup_v2" "security_group" { + name = var.instance_name +} + +resource "openstack_networking_secgroup_rule_v2" "ssh-in-staff-vpn" { + direction = "ingress" + ethertype = "IPv4" + protocol = "tcp" + port_range_min = 22 + port_range_max = 22 + remote_ip_prefix = "193.166.85.0/24" + security_group_id = openstack_networking_secgroup_v2.security_group.id +} + +resource "openstack_networking_secgroup_rule_v2" "ssh-in-espoo-office-00" { + direction = "ingress" + ethertype = "IPv4" + protocol = "tcp" + port_range_min = 22 + port_range_max = 22 + remote_ip_prefix = "193.166.1.0/24" + security_group_id = openstack_networking_secgroup_v2.security_group.id +} + +resource "openstack_networking_secgroup_rule_v2" "ssh-in-espoo-office-01" { + direction = "ingress" + ethertype = "IPv4" + protocol = "tcp" + port_range_min = 22 + port_range_max = 22 + remote_ip_prefix = "193.166.2.0/24" + security_group_id = openstack_networking_secgroup_v2.security_group.id +} + +resource "openstack_networking_secgroup_rule_v2" "ssh-in-espoo-office-02" { + direction = "ingress" + ethertype = "IPv4" + protocol = "tcp" + port_range_min = 22 + port_range_max = 22 + remote_ip_prefix = "193.166.80.0/23" + security_group_id = openstack_networking_secgroup_v2.security_group.id +} + +resource "openstack_networking_secgroup_rule_v2" "ssh-in-kajaani-office-00" { + direction = "ingress" + ethertype = "IPv4" + protocol = "tcp" + port_range_min = 22 + port_range_max = 22 + remote_ip_prefix = "193.166.86.0/24" + security_group_id = openstack_networking_secgroup_v2.security_group.id +} + +resource "openstack_networking_secgroup_rule_v2" "ssh-in-pa-vpn-00" { + direction = "ingress" + ethertype = "IPv4" + protocol = "tcp" + port_range_min = 22 + port_range_max = 22 + remote_ip_prefix = "193.166.83.0/24" + security_group_id = openstack_networking_secgroup_v2.security_group.id +} + +resource "openstack_networking_secgroup_rule_v2" "http-out" { + direction = "egress" + ethertype = "IPv4" + protocol = "tcp" + port_range_min = 80 + port_range_max = 80 + remote_ip_prefix = "0.0.0.0/0" + security_group_id = openstack_networking_secgroup_v2.security_group.id +} + +resource "openstack_networking_secgroup_rule_v2" "http-in" { + direction = "ingress" + ethertype = "IPv4" + protocol = "tcp" + port_range_min = 80 + port_range_max = 80 + remote_ip_prefix = "0.0.0.0/0" + security_group_id = openstack_networking_secgroup_v2.security_group.id +} + +resource "openstack_networking_secgroup_rule_v2" "https-out" { + direction = "egress" + ethertype = "IPv4" + protocol = "tcp" + port_range_min = 443 + port_range_max = 443 + remote_ip_prefix = "0.0.0.0/0" + security_group_id = openstack_networking_secgroup_v2.security_group.id +} +resource "openstack_networking_secgroup_rule_v2" "https-in" { + direction = "ingress" + ethertype = "IPv4" + protocol = "tcp" + port_range_min = 443 + port_range_max = 443 + remote_ip_prefix = "0.0.0.0/0" + security_group_id = openstack_networking_secgroup_v2.security_group.id +} +resource "openstack_networking_secgroup_rule_v2" "k8s-api-in-pa-vpn" { + direction = "ingress" + ethertype = "IPv4" + protocol = "tcp" + port_range_min = 6444 + port_range_max = 6444 + remote_ip_prefix = "193.166.83.0/24" + security_group_id = openstack_networking_secgroup_v2.security_group.id +} +resource "openstack_networking_secgroup_rule_v2" "aux-k8s-portsp-in" { + direction = "ingress" + ethertype = "IPv4" + protocol = "tcp" + port_range_min = 30001 + port_range_max = 30004 + remote_ip_prefix = "0.0.0.0/0" + security_group_id = openstack_networking_secgroup_v2.security_group.id +} + +resource "openstack_networking_secgroup_rule_v2" "icinga-api-in" { + direction = "ingress" + ethertype = "IPv4" + protocol = "tcp" + port_range_min = 5665 + port_range_max = 5665 + remote_ip_prefix = "86.50.229.150/32" + security_group_id = openstack_networking_secgroup_v2.security_group.id +} + +resource "openstack_networking_secgroup_rule_v2" "icmp-in" { + direction = "ingress" + ethertype = "IPv4" + protocol = "icmp" + remote_ip_prefix = "86.50.229.150/32" + security_group_id = openstack_networking_secgroup_v2.security_group.id +} +# Subnet for the VM. On Pouta all VMs need to be in subnets to boot properly +resource "openstack_networking_subnet_v2" "instance_subnet" { + name = "${var.instance_name}-subnet" + network_id = openstack_networking_network_v2.instance_net.id + cidr = "10.0.0.0/24" + ip_version = 4 + dns_nameservers = [ + "1.1.1.1", + "1.1.0.0", + ] +} + +# Handy output to get the IP address that we've got in the output +output "address" { + value = openstack_networking_floatingip_v2.ip.address +} +output "cloud-init" { + value = local.cloud_init +} diff --git a/terraform/set-name.sh b/terraform/set-name.sh new file mode 100755 index 0000000..51ad2ed --- /dev/null +++ b/terraform/set-name.sh @@ -0,0 +1,38 @@ +#!/bin/bash -eu + +usage_error() { + echo "Usage: $0 --name --project " >&2 + exit 1 +} + +if [ "${#}" -ne 4 ] ; then + usage_error +fi + +instance_name= +project_id= +while [ "${#}" -ge 1 ] && [ -n "${1}" ]; do + case "${1}" in + --name ) instance_name=$2; shift 2;; + --project ) project_id=$2; shift 2;; + -- ) shift; break;; + * ) break;; + esac +done + +dir=$(dirname "$0") +# Need to use two different files because the backend is picky about +# variables it does not know about. The two files are of different "type" +# because backend initialization doesn't automatically recognize "auto" +# variables. +backend_vars_name=tf-backend.tfvars +root_autovars_name=server-name.auto.tfvars +backend_vars_file=${dir}/${backend_vars_name} +root_autovars_file=${dir}/${root_autovars_name} + +cat < "${backend_vars_file}" +bucket = "${project_id}-terraform-state" +key = "${instance_name}.tfstate" +EOF +echo 'instance_name = "'"${instance_name}"'"' >"${root_autovars_file}" +echo 'Call: terraform init [-reconfigure] -backend-config='"${backend_vars_name}"