Skip to content

Commit 7f0f3ba

Browse files
authored
ci: add CI/CD build workflow with GHCR staging and Docker Hub promotion (#7)
* ci: add build-and-push workflow for official-templates Triggers on merge to main (paths: official-templates/**) and manual dispatch. Detects which template directories changed, builds only those in a parallel matrix (fail-fast: false), and pushes to Docker Hub. Features: - Change detection via git diff HEAD~1 HEAD - Per-template Docker Hub registry cache (yottalabsai/buildcache:<template>) with mode=max for full intermediate layer caching - HF_TOKEN support for templates that download models at build time - Manual dispatch with optional template name override Required secrets: DOCKERHUB_USERNAME, DOCKERHUB_TOKEN, HF_TOKEN * chore(unsloth): bump TAG_SUFFIX to 2026032501 to trigger CI * ci: disable Buildx filesystem entitlement check Bake files use ../../container-template contexts (outside working dir), which Buildx 0.13+ flags as requiring explicit --allow=fs.read. BUILDX_BAKE_ENTITLEMENTS_FS=0 opts out of the check. * revert(unsloth): restore TAG_SUFFIX to 2025122201 CI pipeline test complete; reverting temporary bump. * feat(aws-neuron): add template for Inferentia2 and Trainium - Base: ubuntu:22.04 (no NVIDIA runtime) - Python 3.11 via deadsnakes PPA (required by start.sh) - AWS Neuron APT repo: aws-neuronx-tools, runtime-lib, collectives - torch-neuronx 2.x + neuronx-cc 2.x from pip.repos.neuron.amazonaws.com - transformers-neuronx, neuronx-distributed for inf2/trn1/trn2 - HF stack (transformers, datasets, huggingface_hub, accelerate) - JupyterLab, SSH, nginx via shared start.sh * fix(aws-neuron): add /opt/aws/neuron/bin to PATH; add README neuron-ls and neuronx-cc are installed to /opt/aws/neuron/bin by aws-neuronx-tools but that directory was missing from PATH. Also adds README with host setup, run instructions, and smoke tests. * fix(aws-neuron): correct neuronx-cc path in README smoke tests neuronx-cc is pip-installed to /usr/local/bin, not /opt/aws/neuron/bin. * fix(ci): fall back to GITHUB_SHA for CD on workflow_dispatch PR_HEAD_SHA is empty when CD is triggered manually via workflow_dispatch, causing SHA_SHORT to be blank and skopeo to look for a non-existent tag. Fall back to GITHUB_SHA so manual CD runs work correctly. * feat(ci): add sha input to workflow_dispatch for manual CD Allows specifying the exact GHCR image SHA to promote when triggering CD manually (e.g. when HEAD has no CI-built image). Priority: sha input → PR head SHA → GITHUB_SHA (HEAD)
1 parent a8a8a4a commit 7f0f3ba

4 files changed

Lines changed: 508 additions & 0 deletions

File tree

.github/workflows/build.yml

Lines changed: 219 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,219 @@
1+
name: Build and Push
2+
3+
# ──────────────────────────────────────────────────────────────────────────────
4+
# CI — triggered on every PR: builds images and pushes to GHCR for validation.
5+
# CD — triggered when a PR is merged: promotes the GHCR image to Docker Hub
6+
# via skopeo copy (no rebuild).
7+
# ──────────────────────────────────────────────────────────────────────────────
8+
9+
on:
10+
# CI: validate every PR that touches a template
11+
pull_request:
12+
paths:
13+
- 'official-templates/**'
14+
15+
# CD: promote on merge
16+
pull_request_target:
17+
types: [closed]
18+
paths:
19+
- 'official-templates/**'
20+
21+
# Manual override for both CI and CD
22+
workflow_dispatch:
23+
inputs:
24+
template:
25+
description: 'Template name to build (e.g. pytorch). Leave empty to build all changed.'
26+
required: false
27+
type: string
28+
mode:
29+
description: 'ci = build to GHCR only, cd = promote GHCR → Docker Hub'
30+
required: false
31+
default: 'ci'
32+
type: choice
33+
options: [ci, cd]
34+
sha:
35+
description: 'Full or short SHA of the GHCR image to promote (cd mode only). Defaults to HEAD.'
36+
required: false
37+
type: string
38+
39+
# ──────────────────────────────────────────────────────────────────────────────
40+
# CI jobs
41+
# ──────────────────────────────────────────────────────────────────────────────
42+
jobs:
43+
ci-detect:
44+
name: CI — Detect changed templates
45+
if: >
46+
github.event_name == 'pull_request' ||
47+
(github.event_name == 'workflow_dispatch' && inputs.mode == 'ci')
48+
runs-on: ubuntu-latest
49+
outputs:
50+
matrix: ${{ steps.set-matrix.outputs.matrix }}
51+
steps:
52+
- uses: actions/checkout@v4
53+
with:
54+
fetch-depth: 0
55+
56+
- name: Determine templates to build
57+
id: set-matrix
58+
env:
59+
BASE_SHA: ${{ github.event.pull_request.base.sha }}
60+
HEAD_SHA: ${{ github.event.pull_request.head.sha }}
61+
run: |
62+
if [ -n "${{ inputs.template }}" ]; then
63+
TEMPLATES='["${{ inputs.template }}"]'
64+
else
65+
TEMPLATES=$(git diff --name-only "${BASE_SHA}" "${HEAD_SHA}" \
66+
| grep '^official-templates/' \
67+
| awk -F'/' '{print $2}' \
68+
| sort -u \
69+
| grep -v '^\s*$' \
70+
| while read -r dir; do
71+
[ -f "official-templates/$dir/docker-bake.hcl" ] && echo "$dir"
72+
done \
73+
| jq -R -s -c 'split("\n") | map(select(length > 0))')
74+
fi
75+
echo "matrix=${TEMPLATES}" >> "$GITHUB_OUTPUT"
76+
echo "Templates to build: ${TEMPLATES}"
77+
78+
ci-build:
79+
name: CI — Build ${{ matrix.template }}
80+
needs: ci-detect
81+
if: needs.ci-detect.outputs.matrix != '[]' && needs.ci-detect.outputs.matrix != ''
82+
runs-on: ubuntu-latest
83+
permissions:
84+
contents: read
85+
packages: write
86+
strategy:
87+
fail-fast: false
88+
matrix:
89+
template: ${{ fromJson(needs.ci-detect.outputs.matrix) }}
90+
steps:
91+
- uses: actions/checkout@v4
92+
93+
- name: Set up QEMU
94+
uses: docker/setup-qemu-action@v3
95+
96+
- name: Set up Docker Buildx
97+
uses: docker/setup-buildx-action@v3
98+
99+
- name: Log in to GHCR
100+
uses: docker/login-action@v3
101+
with:
102+
registry: ghcr.io
103+
username: ${{ github.actor }}
104+
password: ${{ secrets.GITHUB_TOKEN }}
105+
106+
- name: Log in to Docker Hub (for registry cache read)
107+
uses: docker/login-action@v3
108+
with:
109+
username: ${{ secrets.DOCKERHUB_USERNAME }}
110+
password: ${{ secrets.DOCKERHUB_TOKEN }}
111+
112+
- name: Build and push to GHCR
113+
working-directory: official-templates/${{ matrix.template }}
114+
env:
115+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
116+
TEMPLATE: ${{ matrix.template }}
117+
BUILDX_BAKE_ENTITLEMENTS_FS: "0"
118+
run: |
119+
SHA_SHORT="${GITHUB_SHA::7}"
120+
CACHE_REF="yottalabsai/buildcache:${TEMPLATE}"
121+
122+
# Override each bake target's tags to GHCR.
123+
# Tag scheme: ghcr.io/yottalabsai/<template>:<target>-sha-<sha>
124+
# Using per-target tags handles multi-target bake files (e.g. base has 8 targets).
125+
OVERRIDES=$(docker buildx bake --print 2>/dev/null \
126+
| jq -r --arg tmpl "${TEMPLATE}" --arg sha "${SHA_SHORT}" \
127+
'.target | keys[] | "--set \(.).tags=ghcr.io/yottalabsai/\($tmpl):\(.)-sha-\($sha)"' \
128+
| tr '\n' ' ')
129+
130+
eval "docker buildx bake ${OVERRIDES} \
131+
--set '*.cache-from=type=registry,ref=${CACHE_REF}' \
132+
--set '*.cache-to=type=registry,ref=${CACHE_REF},mode=max' \
133+
--set '*.args.HF_TOKEN=${HF_TOKEN:-}' \
134+
--push"
135+
136+
# ──────────────────────────────────────────────────────────────────────────────
137+
# CD jobs
138+
# ──────────────────────────────────────────────────────────────────────────────
139+
cd-detect:
140+
name: CD — Detect merged templates
141+
if: >
142+
(github.event_name == 'pull_request_target' && github.event.pull_request.merged == true) ||
143+
(github.event_name == 'workflow_dispatch' && inputs.mode == 'cd')
144+
runs-on: ubuntu-latest
145+
outputs:
146+
matrix: ${{ steps.set-matrix.outputs.matrix }}
147+
steps:
148+
- uses: actions/checkout@v4
149+
with:
150+
fetch-depth: 0
151+
152+
- name: Determine templates to promote
153+
id: set-matrix
154+
env:
155+
BASE_SHA: ${{ github.event.pull_request.base.sha }}
156+
HEAD_SHA: ${{ github.event.pull_request.head.sha }}
157+
run: |
158+
if [ -n "${{ inputs.template }}" ]; then
159+
TEMPLATES='["${{ inputs.template }}"]'
160+
else
161+
TEMPLATES=$(git diff --name-only "${BASE_SHA}" "${HEAD_SHA}" \
162+
| grep '^official-templates/' \
163+
| awk -F'/' '{print $2}' \
164+
| sort -u \
165+
| grep -v '^\s*$' \
166+
| while read -r dir; do
167+
[ -f "official-templates/$dir/docker-bake.hcl" ] && echo "$dir"
168+
done \
169+
| jq -R -s -c 'split("\n") | map(select(length > 0))')
170+
fi
171+
echo "matrix=${TEMPLATES}" >> "$GITHUB_OUTPUT"
172+
echo "Templates to promote: ${TEMPLATES}"
173+
174+
cd-promote:
175+
name: CD — Promote ${{ matrix.template }} → Docker Hub
176+
needs: cd-detect
177+
if: needs.cd-detect.outputs.matrix != '[]' && needs.cd-detect.outputs.matrix != ''
178+
runs-on: ubuntu-latest
179+
permissions:
180+
contents: read
181+
packages: read
182+
strategy:
183+
fail-fast: false
184+
matrix:
185+
template: ${{ fromJson(needs.cd-detect.outputs.matrix) }}
186+
steps:
187+
- uses: actions/checkout@v4
188+
189+
- name: Promote GHCR → Docker Hub
190+
env:
191+
TEMPLATE: ${{ matrix.template }}
192+
PR_HEAD_SHA: ${{ github.event.pull_request.head.sha }}
193+
DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }}
194+
DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }}
195+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
196+
run: |
197+
# Priority: manual sha input → PR head → current HEAD
198+
RESOLVE_SHA="${{ inputs.sha }}"
199+
RESOLVE_SHA="${RESOLVE_SHA:-${PR_HEAD_SHA:-${GITHUB_SHA}}}"
200+
SHA_SHORT="${RESOLVE_SHA::7}"
201+
202+
# Use the bake file at the exact commit so target names and
203+
# Docker Hub tags match what CI built.
204+
git checkout "${RESOLVE_SHA}" -- "official-templates/${TEMPLATE}/docker-bake.hcl"
205+
206+
# For each bake target, copy its GHCR image to the Docker Hub tag
207+
# defined in the bake file. skopeo is pre-installed on ubuntu-latest.
208+
cd "official-templates/${TEMPLATE}"
209+
docker buildx bake --print 2>/dev/null \
210+
| jq -r '.target | to_entries[] | "\(.key) \(.value.tags[])"' \
211+
| while IFS=' ' read -r target dh_tag; do
212+
src="docker://ghcr.io/yottalabsai/${TEMPLATE}:${target}-sha-${SHA_SHORT}"
213+
dst="docker://${dh_tag}"
214+
echo "Promoting ${src} → ${dst}"
215+
skopeo copy \
216+
--src-creds "x-access-token:${GITHUB_TOKEN}" \
217+
--dest-creds "${DOCKERHUB_USERNAME}:${DOCKERHUB_TOKEN}" \
218+
"${src}" "${dst}"
219+
done
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
ARG BASE_IMAGE="ubuntu:22.04"
2+
FROM ${BASE_IMAGE}
3+
4+
# ===============================
5+
# Build args
6+
# ===============================
7+
ARG TORCH_NEURONX_VERSION="2.*"
8+
ARG NEURONX_CC_VERSION="2.*"
9+
10+
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
11+
12+
# ===============================
13+
# Env
14+
# ===============================
15+
ENV DEBIAN_FRONTEND=noninteractive \
16+
SHELL=/bin/bash \
17+
PATH=/opt/aws/neuron/bin:/usr/local/bin:/usr/bin:/bin:$PATH \
18+
NEURON_RT_NUM_CORES=1 \
19+
HF_HOME=/workspace/hf \
20+
JUPYTER_PASSWORD=ubuntu
21+
22+
# ===============================
23+
# Workspace
24+
# ===============================
25+
WORKDIR /
26+
RUN mkdir -p /workspace && chmod 777 /workspace
27+
28+
# ===============================
29+
# Base system packages
30+
# ===============================
31+
RUN apt-get update -y && \
32+
apt-get install -y --no-install-recommends \
33+
git wget curl ca-certificates bash \
34+
gnupg2 software-properties-common \
35+
locales tzdata \
36+
openssh-server nginx sudo \
37+
tmux vim zip unzip less procps net-tools htop \
38+
jq tree rsync netcat-openbsd \
39+
build-essential pkg-config \
40+
&& echo "en_US.UTF-8 UTF-8" > /etc/locale.gen \
41+
&& locale-gen \
42+
&& mkdir -p /var/run/sshd \
43+
&& apt-get clean \
44+
&& rm -rf /var/lib/apt/lists/*
45+
46+
# ===============================
47+
# Python 3.11 via deadsnakes PPA
48+
# (start.sh requires python3.11)
49+
# ===============================
50+
RUN add-apt-repository -y ppa:deadsnakes/ppa && \
51+
apt-get update -y && \
52+
apt-get install -y --no-install-recommends \
53+
python3.11 python3.11-venv python3.11-dev \
54+
&& apt-get clean \
55+
&& rm -rf /var/lib/apt/lists/*
56+
57+
RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11 && \
58+
python3.11 -m pip install --no-cache-dir --upgrade pip setuptools wheel
59+
60+
RUN ln -sf /usr/bin/python3.11 /usr/local/bin/python3 && \
61+
ln -sf /usr/bin/python3.11 /usr/local/bin/python
62+
63+
# ===============================
64+
# AWS Neuron APT repo + runtime
65+
# (runtime-lib + collectives cover inf2 and trn1/trn2; dkms is host-side only)
66+
# ===============================
67+
RUN wget -qO- https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB \
68+
| gpg --dearmor -o /etc/apt/trusted.gpg.d/neuron.gpg && \
69+
echo "deb https://apt.repos.neuron.amazonaws.com jammy main" \
70+
> /etc/apt/sources.list.d/neuron.list && \
71+
apt-get update -y && \
72+
apt-get install -y --no-install-recommends \
73+
aws-neuronx-tools \
74+
aws-neuronx-runtime-lib \
75+
aws-neuronx-collectives \
76+
&& apt-get clean \
77+
&& rm -rf /var/lib/apt/lists/*
78+
79+
# ===============================
80+
# torch-neuronx (inf2 + trn1/trn2)
81+
# ===============================
82+
RUN python3.11 -m pip install --no-cache-dir \
83+
"torch-neuronx==${TORCH_NEURONX_VERSION}" torchvision torchaudio \
84+
--extra-index-url https://pip.repos.neuron.amazonaws.com
85+
86+
# ===============================
87+
# Neuron compiler + distributed training + inference optimisation
88+
# ===============================
89+
RUN python3.11 -m pip install --no-cache-dir \
90+
"neuronx-cc==${NEURONX_CC_VERSION}" \
91+
transformers-neuronx \
92+
neuronx-distributed \
93+
--extra-index-url https://pip.repos.neuron.amazonaws.com
94+
95+
# ===============================
96+
# Hugging Face stack + JupyterLab
97+
# ===============================
98+
RUN python3.11 -m pip install --no-cache-dir \
99+
transformers datasets huggingface_hub accelerate \
100+
jupyterlab ipywidgets jupyter-archive "notebook==7.3.3"
101+
102+
# Build-time assertion: prevents pushing a broken image
103+
RUN python3.11 -c "import jupyter; import jupyterlab; print('python3.11 jupyter ok')"
104+
105+
# ===============================
106+
# User
107+
# ===============================
108+
RUN useradd -ms /bin/bash ubuntu && \
109+
usermod -aG sudo ubuntu && \
110+
echo "ubuntu ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/ubuntu && \
111+
echo "ubuntu:ubuntu" | chpasswd
112+
113+
# ===============================
114+
# SSH config (start.sh handles sshd startup)
115+
# ===============================
116+
RUN sed -i 's/#PasswordAuthentication yes/PasswordAuthentication yes/' /etc/ssh/sshd_config && \
117+
sed -i 's/PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config && \
118+
rm -f /etc/ssh/ssh_host_*
119+
120+
# ===============================
121+
# start.sh (from buildx bake context "scripts")
122+
# ===============================
123+
COPY --from=scripts start.sh /start.sh
124+
RUN chmod 755 /start.sh
125+
126+
# ===============================
127+
# nginx / branding
128+
# ===============================
129+
COPY --from=proxy nginx.conf /etc/nginx/nginx.conf
130+
COPY --from=proxy readme.html /usr/share/nginx/html/readme.html
131+
132+
COPY --from=logo yotta.txt /etc/yotta.txt
133+
RUN echo 'cat /etc/yotta.txt' >> /root/.bashrc
134+
135+
# ===============================
136+
# Ports
137+
# ===============================
138+
EXPOSE 22 80 8888
139+
140+
USER root
141+
WORKDIR /root
142+
CMD ["/bin/bash", "-lc", "exec /start.sh"]

0 commit comments

Comments
 (0)