Skip to content
Draft
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 10 additions & 7 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -98,17 +98,20 @@ define build_image
# Dockerfile runs). The mount hides the base image's default repos.
# Konflux buildah-oci-ta task mounts YUM_REPOS_D_FETCHED at YUM_REPOS_D_TARGET (/etc/yum.repos.d).
# See https://github.com/konflux-ci/build-definitions/blob/main/task/buildah-oci-ta/
$(eval CACHI2_VOLUME := $(if $(and $(wildcard cachi2/output),$(wildcard $(BUILD_DIR)prefetch-input)),\
--volume $(ROOT_DIR)cachi2/output:/cachi2/output:Z \
--volume $(ROOT_DIR)cachi2/output/deps/rpm/$(RPM_ARCH)/repos.d/:/etc/yum.repos.d/:Z,))
$(eval COMPONENT_DIR_STR := $(patsubst %/,%,$(BUILD_DIR)))
$(eval CACHI2_HASH := $(shell python3 -c "import hashlib; print(hashlib.md5('$(COMPONENT_DIR_STR)'.encode()).hexdigest())"))
$(eval CACHI2_DIR := cachi2/output/$(CACHI2_HASH))
$(eval CACHI2_VOLUME := $(if $(and $(wildcard $(CACHI2_DIR)),$(wildcard $(BUILD_DIR)prefetch-input)),\
--volume $(ROOT_DIR)$(CACHI2_DIR):/cachi2/output:Z \
--volume $(ROOT_DIR)$(CACHI2_DIR)/deps/rpm/$(RPM_ARCH)/repos.d/:/etc/yum.repos.d/:Z,))
$(info # Building $(IMAGE_NAME) using $(DOCKERFILE_NAME) with $(CONF_FILE) and $(BUILD_ARGS)...)

@if [ -d '$(BUILD_DIR)prefetch-input' ] && [ ! -d cachi2/output ]; then \
echo "Prefetch required for hermetic build. Run: scripts/lockfile-generators/prefetch-all.sh --component-dir $(patsubst %/,%,$(BUILD_DIR)) -- see scripts/lockfile-generators/README.md"; \
@if [ -d '$(BUILD_DIR)prefetch-input' ] && [ ! -d '$(CACHI2_DIR)' ]; then \
echo "Prefetch required for hermetic build. Run: scripts/lockfile-generators/prefetch-all.sh --component-dir $(COMPONENT_DIR_STR) -- see scripts/lockfile-generators/README.md"; \
exit 1; \
fi
@if [ -d cachi2/output ] && [ -d '$(BUILD_DIR)prefetch-input' ] && [ ! -d 'cachi2/output/deps/rpm/$(RPM_ARCH)/repos.d' ]; then \
echo "Missing RPM repos for $(RPM_ARCH). Re-run: scripts/lockfile-generators/prefetch-all.sh --component-dir $(patsubst %/,%,$(BUILD_DIR))"; \
@if [ -d '$(CACHI2_DIR)' ] && [ -d '$(BUILD_DIR)prefetch-input' ] && [ ! -d '$(CACHI2_DIR)/deps/rpm/$(RPM_ARCH)/repos.d' ]; then \
echo "Missing RPM repos for $(RPM_ARCH). Re-run: scripts/lockfile-generators/prefetch-all.sh --component-dir $(COMPONENT_DIR_STR)"; \
exit 1; \
fi
$(ROOT_DIR)/scripts/sandbox.py --dockerfile '$(2)' --platform '$(BUILD_ARCH)' -- \
Expand Down
11 changes: 7 additions & 4 deletions docs/hermetic-guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ Benefits:
┌──────────────────────────────────────────────────────────────────┐
│ Prefetch (before podman build) │
│ │
│ Local/GHA: prefetch-all.sh → cachi2/output/deps/
│ Local/GHA: prefetch-all.sh → cachi2/output/<hash>/deps/ │
│ Konflux: prefetch-dependencies Tekton task │
└──────────────────┬───────────────────────────────────────────────┘
Expand Down Expand Up @@ -216,9 +216,12 @@ detailed usage.
The `build_image` macro was updated to auto-detect hermetic builds:

```makefile
$(eval CACHI2_VOLUME := $(if $(and $(wildcard cachi2/output),$(wildcard $(BUILD_DIR)prefetch-input)),\
--volume $(ROOT_DIR)cachi2/output:/cachi2/output:Z \
--volume $(ROOT_DIR)cachi2/output/deps/rpm/$(RPM_ARCH)/repos.d/:/etc/yum.repos.d/:Z,))
$(eval COMPONENT_DIR_STR := $(patsubst %/,%,$(BUILD_DIR)))
$(eval CACHI2_HASH := $(shell python3 -c "import hashlib; print(hashlib.md5('$(COMPONENT_DIR_STR)'.encode()).hexdigest())"))
$(eval CACHI2_DIR := cachi2/output/$(CACHI2_HASH))
$(eval CACHI2_VOLUME := $(if $(and $(wildcard $(CACHI2_DIR)),$(wildcard $(BUILD_DIR)prefetch-input)),\
--volume $(ROOT_DIR)$(CACHI2_DIR):/cachi2/output:Z \
--volume $(ROOT_DIR)$(CACHI2_DIR)/deps/rpm/$(RPM_ARCH)/repos.d/:/etc/yum.repos.d/:Z,))
```

This evaluates per-target: only targets with both `cachi2/output/` and a
Expand Down
182 changes: 182 additions & 0 deletions docs/learnings/hermetic-build-architecture_.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
# Hermetic Build Architecture for Codeserver

## Overview

The codeserver workbench (`codeserver/ubi9-python-3.12`) uses a fully hermetic build
where all dependencies (RPMs, npm packages, Python wheels, generic tarballs) are
prefetched before the Docker build runs. The build operates without network access.

## Build Chain

```text
prefetch-all.sh → populates cachi2/output/<hash>/deps/
Makefile → detects cachi2/output/ → injects --volume into podman build
sandbox.py → creates minimal build context from Dockerfile COPY/ADD directives
podman build → runs Dockerfile with /cachi2/output/ mounted
```

### prefetch-all.sh

Orchestrates five lockfile generators in sequence:

| Step | Generator | Input | Output |
|------|-----------|-------|--------|
| 1 | `create-artifact-lockfile.py` | `artifacts.in.yaml` | `cachi2/output/<hash>/deps/generic/` (GPG keys, nfpm, node headers, oc client, VS Code extensions) |
| 2 | `create-requirements-lockfile.sh` | `pyproject.toml` | `cachi2/output/<hash>/deps/pip/` (Python wheels) |
| 3 | `download-npm.sh` | `package-lock.json` files | `cachi2/output/<hash>/deps/npm/` (npm tarballs) |
| 4 | `hermeto-fetch-rpm.sh` | `rpms.lock.yaml` | `cachi2/output/<hash>/deps/rpm/{arch}/` (RPMs + repo metadata) |
| 5 | `create-go-lockfile.sh` | `go.mod` (via git submodule) | `cachi2/output/<hash>/deps/gomod/` (Go modules) |

Variants are selected via `--rhds` flag:
- Default (`odh`): uses CentOS Stream + UBI repos (no subscription needed)
- `--rhds`: uses RHEL subscription repos (needs `--activation-key` and `--org`)

### Makefile auto-detection

```makefile
$(eval COMPONENT_DIR_STR := $(patsubst %/,%,$(BUILD_DIR)))
$(eval CACHI2_HASH := $(shell python3 -c "import hashlib; print(hashlib.md5('$(COMPONENT_DIR_STR)'.encode()).hexdigest())"))
$(eval CACHI2_DIR := cachi2/output/$(CACHI2_HASH))
$(eval CACHI2_VOLUME := $(if $(and $(wildcard $(CACHI2_DIR)),$(wildcard $(BUILD_DIR)prefetch-input)),\
--volume $(ROOT_DIR)$(CACHI2_DIR):/cachi2/output:Z \
--volume $(ROOT_DIR)$(CACHI2_DIR)/deps/rpm/$(RPM_ARCH)/repos.d/:/etc/yum.repos.d/:Z,))
```

When both `cachi2/output/<hash>/` and `<target>/prefetch-input/` exist, the Makefile
automatically mounts the per-component prefetched dependencies into the build.
The `<hash>` is the MD5 of the component directory name, allowing concurrent
builds of different components without collisions. The second mount overlays
`/etc/yum.repos.d/` with hermeto-generated repos, making local builds behave
like Konflux (repos are already in place when the Dockerfile runs).

### sandbox.py

Wraps `podman build` by creating a minimal build context:
1. Parses the Dockerfile using `bin/buildinputs` (Go tool, Dockerfile → LLB → JSON)
2. Identifies all files referenced in COPY/ADD directives
3. Creates a temporary directory with only those files
4. Passes `{}` placeholder to podman which gets replaced with the tmpdir path

sandbox.py does NOT modify volumes, build args, or repos — it only manages the
build context.

## cachi2/output Directory Structure

After prefetching, each component gets its own namespaced directory under
`cachi2/output/<hash>/` (where `<hash>` is the MD5 of the component directory
name, e.g. `cachi2/output/a1b2c3.../`). This prevents collisions when building
multiple components in parallel.

```text
cachi2/output/
└── <hash>/ # per-component namespace (MD5 of component dir)
├── deps/
│ ├── rpm/
│ │ ├── x86_64/
│ │ │ ├── <repo-name>/ # RPM files + repodata/
│ │ │ └── repos.d/ # Generated .repo files with file:// URLs
│ │ ├── aarch64/
│ │ ├── ppc64le/
│ │ └── s390x/
│ ├── npm/ # npm tarballs
│ ├── pip/ # Python wheels
│ └── generic/ # GPG keys, tarballs, etc.
├── bom.json
└── .build-config.json
```

The Makefile mounts `cachi2/output/<hash>/` at `/cachi2/output/` inside the
container, so the Dockerfile always sees `/cachi2/output/deps/...` regardless
of which component is being built.

Key detail: when `rpms.in.yaml` declares `moduleEnable: [nodejs:22]`, hermeto
downloads module metadata (`modules.yaml`) alongside the RPMs and includes it
in the generated repodata. This allows `dnf module enable nodejs:22` to work
with the hermeto repos. Both our `hermeto-fetch-rpm.sh` wrapper and Konflux's
`prefetch-dependencies-oci-ta` task produce repos with this metadata.

## Three Build Environments

### Local development

```bash
scripts/lockfile-generators/prefetch-all.sh --component-dir codeserver/ubi9-python-3.12
make codeserver-ubi9-python-3.12
```

Makefile detects `cachi2/output/<hash>/` and auto-injects the volume mount.

Comment thread
coderabbitai[bot] marked this conversation as resolved.
### GitHub Actions

The TEMPLATE workflow (`build-notebooks-TEMPLATE.yaml`) handles it transparently:

1. **Prefetch step**: runs `prefetch-all.sh`, outputs `EXTRA_BUILD_ARGS` with
volume mount
2. **Build step**: runs `make` with `CONTAINER_BUILD_CACHE_ARGS` containing
the volume mount
3. For subscription builds (AIPCC), passes `--rhds --activation-key ... --org ...`
to use the RHDS variant lockfiles

### Konflux (Tekton)

1. PipelineRun YAML declares `prefetch-input` entries pointing to lockfiles
2. cachi2's `prefetch-dependencies` task downloads everything using hermeto
3. Build task mounts deps at `/cachi2/output/` automatically
4. Network isolation enforced at the pipeline level

All three environments produce the same `/cachi2/output/deps/` layout inside
the container because they all use hermeto under the hood for RPM prefetching.
On the host, local/GHA builds use `cachi2/output/<hash>/deps/` while Konflux
uses its own staging directory.

## Variant Directories (ODH vs RHDS)

Lockfiles are organized into two variant directories under `prefetch-input/`:

```text
prefetch-input/
├── odh/ # upstream (CentOS Stream + UBI repos)
│ ├── rpms.in.yaml
│ ├── rpms.lock.yaml
│ ├── artifacts.in.yaml
│ └── artifacts.lock.yaml
├── rhds/ # downstream (RHEL subscription repos)
│ ├── rpms.in.yaml
│ ├── rpms.lock.yaml
│ ├── artifacts.in.yaml
│ └── artifacts.lock.yaml
├── repos/ # shared DNF repo definitions
├── code-server/ # git submodule (vendored source)
└── patches/ # build patches for offline operation
```

ODH uses CentOS Stream packages; RHDS uses RHEL packages. The choice matters
because base images differ: ODH uses a c9s base, AIPCC uses a RHEL base.
Mixing variants causes RPM conflicts (see openssl-fips-provider-conflict.md).

## Dockerfile Structure

The Dockerfile is multi-stage with 5 stages:

| Stage | Purpose |
|-------|---------|
| `rpm-base` | Builds code-server from source into an RPM |
| `whl-cache` | Installs Python wheels, exports compiled C-extension wheels for ppc64le/s390x |
| `cpu-base` | Installs OS packages + tools (oc client, micropipenv, uv) |
| `codeserver` | Final image (code-server + nginx + Python packages) |
| `tests` | Smoke test stage |

Each stage that runs `dnf install` needs repos configured. Repos are injected
by the infrastructure, not by the Dockerfile:

- **Local/GHA**: The Makefile volume-mounts `repos.d/` at `/etc/yum.repos.d/`,
overlaying the base image's default repos.
- **Konflux**: The `buildah-oci-ta` task volume-mounts `YUM_REPOS_D_FETCHED`
at `/etc/yum.repos.d/` in the same way.

Both environments replace the base image's default repos. For targets that
need nodejs (codeserver), `rpms.in.yaml` declares `moduleEnable: [nodejs:22]`,
which makes hermeto include module metadata in the repodata. The Dockerfile
runs `dnf module enable nodejs:22 -y` to activate the module stream.

No `LOCAL_BUILD` build arg, no if/else branching, no `rm -f` or `cp` of repos.
9 changes: 5 additions & 4 deletions scripts/lockfile-generators/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,14 @@ All scripts must be run from the **repository root**.
**For most local and CI use, this is the main script you need to run.**

`prefetch-all.sh` orchestrates all five lockfile generators in the correct
order, downloading dependencies into `cachi2/output/deps/`. After running it,
the Makefile auto-detects `cachi2/output/` and passes `--volume` to
order, downloading dependencies into `cachi2/output/<hash>/deps/` (where `<hash>` is the MD5 hash of the component directory name to allow concurrent local builds). After running it,
the Makefile auto-detects the component's `cachi2/output/<hash>` directory and passes `--volume` to
`podman build`.

```bash
# Upstream ODH (default variant, CentOS Stream base, no subscription):
scripts/lockfile-generators/prefetch-all.sh \
--component-dir codeserver/ubi9-python-3.12
--component-dir codeserver/ubi9-python-3.12 --arch aarch64

# Downstream RHDS (with RHEL subscription for cdn.redhat.com RPMs):
scripts/lockfile-generators/prefetch-all.sh \
Expand All @@ -78,6 +78,7 @@ gmake codeserver-ubi9-python-3.12 BUILD_ARCH=linux/arm64 PUSH_IMAGES=no
| `--component-dir DIR` | Component directory (required), e.g. `codeserver/ubi9-python-3.12` |
| `--rhds` | Use downstream (RHDS) lockfiles instead of upstream (ODH, the default) |
| `--flavor NAME` | Lock file flavor (default: `cpu`) |
| `--arch ARCH` | Target architecture to filter downloads (default: host architecture) |
| `--activation-key KEY` | Red Hat activation key for RHEL RPMs (optional) |
| `--org ORG` | Red Hat organization ID for RHEL RPMs (optional) |

Expand Down Expand Up @@ -152,7 +153,7 @@ internally. Option 6 (Git submodule) is a manual setup.
| Helper | Used by | Purpose |
|--------|---------|---------|
| `helpers/pylock-to-requirements.py` | pip | Convert `pylock.<flavor>.toml` (PEP 751) to pip-compatible `requirements.<flavor>.txt` with `--hash` lines. |
| `helpers/download-pip-packages.py` | pip | Standalone pip downloader: downloads wheels/sdists from a `requirements.txt` (with `--hash` lines) into `cachi2/output/deps/pip/`. Not called by `create-requirements-lockfile.sh` (which has its own inline download from pylock.toml). |
| `helpers/download-pip-packages.py` | pip | Pip downloader: downloads wheels/sdists from a `requirements.txt` (with `--hash` lines) into `cachi2/output/<hash>/deps/pip/`. Called by `create-requirements-lockfile.sh --download`. Supports `--arch` filtering and parallel downloads. |
| `helpers/download-rpms.sh` | RPM | Download RPMs from `rpms.lock.yaml` via `wget` into `cachi2/output/deps/rpm/` and create DNF repo metadata. Standalone alternative to `hermeto-fetch-rpm.sh`. |
| `helpers/hermeto-fetch-rpm.sh` | RPM | Download RPMs from `rpms.lock.yaml` using [Hermeto](https://github.com/hermetoproject/hermeto) in a container. Handles RHEL entitlement cert extraction for `cdn.redhat.com` auth. Called by `create-rpm-lockfile.sh --download`. |
| `helpers/hermeto-fetch-npm.sh` | npm | Alternative npm fetcher using [Hermeto](https://github.com/hermetoproject/hermeto) in a container. |
Expand Down
4 changes: 2 additions & 2 deletions scripts/lockfile-generators/create-artifact-lockfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,14 @@
import argparse
import hashlib
import subprocess
import sys
import sys, os
from pathlib import Path
from typing import Any, Optional

import yaml

# Constants
CACHE_BASE_DIR = Path("cachi2/output/deps/generic")
CACHE_BASE_DIR = Path(os.environ.get("CACHI2_OUT_DIR", "cachi2/output")) / "deps" / "generic"
METADATA_VERSION = "1.0"
CHUNK_SIZE = 8192

Expand Down
85 changes: 6 additions & 79 deletions scripts/lockfile-generators/create-requirements-lockfile.sh
Original file line number Diff line number Diff line change
Expand Up @@ -156,92 +156,19 @@ if [[ "$DO_DOWNLOAD" == true ]]; then

# Output directory must match Cachi2 layout so prefetched wheels are found
# during hermetic/offline builds (e.g. Docker COPY from cachi2/output/deps/pip).
OUT_DIR="cachi2/output/deps/pip"
OUT_DIR="${CACHI2_OUT_DIR:-cachi2/output}/deps/pip"
mkdir -p "$OUT_DIR"

# Use sha256sum on Linux, shasum -a 256 on macOS (portable).
if command -v sha256sum &>/dev/null; then
sha256_of() { sha256sum "$1" | cut -d' ' -f1; }
else
sha256_of() { shasum -a 256 "$1" | cut -d' ' -f1; }
fi

# Count lines in pylock that look like "url = \"...\" ... sha256 = \"...\""
# (one per wheel; multi-line wheel blocks have one such line per wheel).
total=$(grep -c 'url = ".*sha256 = "' "$PYLOCK_FILE" || true)
echo " ${total} wheel(s) to download into ${OUT_DIR}/"
echo ""

idx=0
# Read one line per wheel from the lockfile (same pattern as above).
while IFS= read -r line; do
idx=$((idx + 1))

# Extract URL and expected sha256 from lockfile line (TOML-style).
url=$(echo "$line" | sed 's/.*url = "\([^"]*\)".*/\1/')
sha=$(echo "$line" | sed 's/.*sha256 = "\([^"]*\)".*/\1/')

if [[ -z "$url" || -z "$sha" ]]; then
echo " ERROR: failed to parse url or sha256 from lockfile line (wheel ${idx})" >&2
echo " line: ${line:0:120}..." >&2
exit 1
fi

# Filename is the last path segment of the URL, without query/fragment.
filename="${url##*/}"; filename="${filename%%[?#]*}"
if [[ -z "$filename" ]]; then
echo " ERROR: could not derive filename from URL (wheel ${idx})" >&2
echo " URL: ${url}" >&2
exit 1
fi
dest="${OUT_DIR}/${filename}"

echo "[${idx}/${total}] ${filename}"

# Resume partial runs: reuse a file only if its digest matches this wheel.
if [[ -f "$dest" ]]; then
actual=$(sha256_of "$dest")
if [[ "$actual" == "$sha" ]]; then
echo " Already present (checksum OK), skipping download."
else
echo " WARNING: Ignoring stale or mismatched cached wheel (digest does not match this lockfile entry)." >&2
echo " file: ${dest}" >&2
echo " got: ${actual}" >&2
echo " expected: ${sha}" >&2
echo " Removing cached file and re-downloading." >&2
rm -f "$dest"
fi
fi

if [[ ! -f "$dest" ]]; then
echo " Downloading: ${url}"
if ! wget -q -O "$dest" "$url"; then
echo " ERROR: download failed for ${filename}" >&2
echo " URL: ${url}" >&2
echo " Run 'wget -O /dev/null \"${url}\"' to see the full error." >&2
rm -f "$dest"
exit 1
fi
fi

# Verify digest so corrupted downloads are detected.
actual=$(sha256_of "$dest")
if [[ "$actual" != "$sha" ]]; then
echo " ERROR: checksum mismatch (got ${actual}, expected ${sha})" >&2
rm -f "$dest"
exit 1
fi
echo " Checksum OK (sha256:${actual:0:16}...)"
done < <(grep 'url = ".*sha256 = "' "$PYLOCK_FILE")
# Delegate to python script for parallel downloading and filtering.
python3 scripts/lockfile-generators/helpers/download-pip-packages.py \
--output-dir "$OUT_DIR" ${ARCH:+--arch "$ARCH"} "$REQUIREMENTS_FILE"

echo ""
echo "Done: ${total} file(s) present and validated in ${OUT_DIR}/"
fi

echo ""
echo "=== All done ==="
echo " pylock.toml : ${PYLOCK_FILE}"
echo " requirements : ${REQUIREMENTS_FILE}"
if [[ "$DO_DOWNLOAD" == true ]]; then
echo " wheels : cachi2/output/deps/pip/"
fi
echo " wheels : ${OUT_DIR}/"
fi
Loading
Loading