Skip to content

Commit adaf0ae

Browse files
authored
Merge branch 'main' into update-pytorch-tag
2 parents 9098e61 + fd31652 commit adaf0ae

27 files changed

Lines changed: 640 additions & 33 deletions

File tree

.github/workflows/build.yml

Lines changed: 219 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,219 @@
1+
name: Build and Push
2+
3+
# ──────────────────────────────────────────────────────────────────────────────
4+
# CI — triggered on every PR: builds images and pushes to GHCR for validation.
5+
# CD — triggered when a PR is merged: promotes the GHCR image to Docker Hub
6+
# via skopeo copy (no rebuild).
7+
# ──────────────────────────────────────────────────────────────────────────────
8+
9+
on:
10+
# CI: validate every PR that touches a template
11+
pull_request:
12+
paths:
13+
- 'official-templates/**'
14+
15+
# CD: promote on merge
16+
pull_request_target:
17+
types: [closed]
18+
paths:
19+
- 'official-templates/**'
20+
21+
# Manual override for both CI and CD
22+
workflow_dispatch:
23+
inputs:
24+
template:
25+
description: 'Template name to build (e.g. pytorch). Leave empty to build all changed.'
26+
required: false
27+
type: string
28+
mode:
29+
description: 'ci = build to GHCR only, cd = promote GHCR → Docker Hub'
30+
required: false
31+
default: 'ci'
32+
type: choice
33+
options: [ci, cd]
34+
sha:
35+
description: 'Full or short SHA of the GHCR image to promote (cd mode only). Defaults to HEAD.'
36+
required: false
37+
type: string
38+
39+
# ──────────────────────────────────────────────────────────────────────────────
40+
# CI jobs
41+
# ──────────────────────────────────────────────────────────────────────────────
42+
jobs:
43+
ci-detect:
44+
name: CI — Detect changed templates
45+
if: >
46+
github.event_name == 'pull_request' ||
47+
(github.event_name == 'workflow_dispatch' && inputs.mode == 'ci')
48+
runs-on: ubuntu-latest
49+
outputs:
50+
matrix: ${{ steps.set-matrix.outputs.matrix }}
51+
steps:
52+
- uses: actions/checkout@v4
53+
with:
54+
fetch-depth: 0
55+
56+
- name: Determine templates to build
57+
id: set-matrix
58+
env:
59+
BASE_SHA: ${{ github.event.pull_request.base.sha }}
60+
HEAD_SHA: ${{ github.event.pull_request.head.sha }}
61+
run: |
62+
if [ -n "${{ inputs.template }}" ]; then
63+
TEMPLATES='["${{ inputs.template }}"]'
64+
else
65+
TEMPLATES=$(git diff --name-only "${BASE_SHA}" "${HEAD_SHA}" \
66+
| grep '^official-templates/' \
67+
| awk -F'/' '{print $2}' \
68+
| sort -u \
69+
| grep -v '^\s*$' \
70+
| while read -r dir; do
71+
[ -f "official-templates/$dir/docker-bake.hcl" ] && echo "$dir"
72+
done \
73+
| jq -R -s -c 'split("\n") | map(select(length > 0))')
74+
fi
75+
echo "matrix=${TEMPLATES}" >> "$GITHUB_OUTPUT"
76+
echo "Templates to build: ${TEMPLATES}"
77+
78+
ci-build:
79+
name: CI — Build ${{ matrix.template }}
80+
needs: ci-detect
81+
if: needs.ci-detect.outputs.matrix != '[]' && needs.ci-detect.outputs.matrix != ''
82+
runs-on: ubuntu-latest
83+
permissions:
84+
contents: read
85+
packages: write
86+
strategy:
87+
fail-fast: false
88+
matrix:
89+
template: ${{ fromJson(needs.ci-detect.outputs.matrix) }}
90+
steps:
91+
- uses: actions/checkout@v4
92+
93+
- name: Set up QEMU
94+
uses: docker/setup-qemu-action@v3
95+
96+
- name: Set up Docker Buildx
97+
uses: docker/setup-buildx-action@v3
98+
99+
- name: Log in to GHCR
100+
uses: docker/login-action@v3
101+
with:
102+
registry: ghcr.io
103+
username: ${{ github.actor }}
104+
password: ${{ secrets.GITHUB_TOKEN }}
105+
106+
- name: Log in to Docker Hub (for registry cache read)
107+
uses: docker/login-action@v3
108+
with:
109+
username: ${{ secrets.DOCKERHUB_USERNAME }}
110+
password: ${{ secrets.DOCKERHUB_TOKEN }}
111+
112+
- name: Build and push to GHCR
113+
working-directory: official-templates/${{ matrix.template }}
114+
env:
115+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
116+
TEMPLATE: ${{ matrix.template }}
117+
BUILDX_BAKE_ENTITLEMENTS_FS: "0"
118+
run: |
119+
SHA_SHORT="${GITHUB_SHA::7}"
120+
CACHE_REF="yottalabsai/buildcache:${TEMPLATE}"
121+
122+
# Override each bake target's tags to GHCR.
123+
# Tag scheme: ghcr.io/yottalabsai/<template>:<target>-sha-<sha>
124+
# Using per-target tags handles multi-target bake files (e.g. base has 8 targets).
125+
OVERRIDES=$(docker buildx bake --print 2>/dev/null \
126+
| jq -r --arg tmpl "${TEMPLATE}" --arg sha "${SHA_SHORT}" \
127+
'.target | keys[] | "--set \(.).tags=ghcr.io/yottalabsai/\($tmpl):\(.)-sha-\($sha)"' \
128+
| tr '\n' ' ')
129+
130+
eval "docker buildx bake ${OVERRIDES} \
131+
--set '*.cache-from=type=registry,ref=${CACHE_REF}' \
132+
--set '*.cache-to=type=registry,ref=${CACHE_REF},mode=max' \
133+
--set '*.args.HF_TOKEN=${HF_TOKEN:-}' \
134+
--push"
135+
136+
# ──────────────────────────────────────────────────────────────────────────────
137+
# CD jobs
138+
# ──────────────────────────────────────────────────────────────────────────────
139+
cd-detect:
140+
name: CD — Detect merged templates
141+
if: >
142+
(github.event_name == 'pull_request_target' && github.event.pull_request.merged == true) ||
143+
(github.event_name == 'workflow_dispatch' && inputs.mode == 'cd')
144+
runs-on: ubuntu-latest
145+
outputs:
146+
matrix: ${{ steps.set-matrix.outputs.matrix }}
147+
steps:
148+
- uses: actions/checkout@v4
149+
with:
150+
fetch-depth: 0
151+
152+
- name: Determine templates to promote
153+
id: set-matrix
154+
env:
155+
BASE_SHA: ${{ github.event.pull_request.base.sha }}
156+
HEAD_SHA: ${{ github.event.pull_request.head.sha }}
157+
run: |
158+
if [ -n "${{ inputs.template }}" ]; then
159+
TEMPLATES='["${{ inputs.template }}"]'
160+
else
161+
TEMPLATES=$(git diff --name-only "${BASE_SHA}" "${HEAD_SHA}" \
162+
| grep '^official-templates/' \
163+
| awk -F'/' '{print $2}' \
164+
| sort -u \
165+
| grep -v '^\s*$' \
166+
| while read -r dir; do
167+
[ -f "official-templates/$dir/docker-bake.hcl" ] && echo "$dir"
168+
done \
169+
| jq -R -s -c 'split("\n") | map(select(length > 0))')
170+
fi
171+
echo "matrix=${TEMPLATES}" >> "$GITHUB_OUTPUT"
172+
echo "Templates to promote: ${TEMPLATES}"
173+
174+
cd-promote:
175+
name: CD — Promote ${{ matrix.template }} → Docker Hub
176+
needs: cd-detect
177+
if: needs.cd-detect.outputs.matrix != '[]' && needs.cd-detect.outputs.matrix != ''
178+
runs-on: ubuntu-latest
179+
permissions:
180+
contents: read
181+
packages: read
182+
strategy:
183+
fail-fast: false
184+
matrix:
185+
template: ${{ fromJson(needs.cd-detect.outputs.matrix) }}
186+
steps:
187+
- uses: actions/checkout@v4
188+
189+
- name: Promote GHCR → Docker Hub
190+
env:
191+
TEMPLATE: ${{ matrix.template }}
192+
PR_HEAD_SHA: ${{ github.event.pull_request.head.sha }}
193+
DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }}
194+
DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }}
195+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
196+
run: |
197+
# Priority: manual sha input → PR head → current HEAD
198+
RESOLVE_SHA="${{ inputs.sha }}"
199+
RESOLVE_SHA="${RESOLVE_SHA:-${PR_HEAD_SHA:-${GITHUB_SHA}}}"
200+
SHA_SHORT="${RESOLVE_SHA::7}"
201+
202+
# Use the bake file at the exact commit so target names and
203+
# Docker Hub tags match what CI built.
204+
git checkout "${RESOLVE_SHA}" -- "official-templates/${TEMPLATE}/docker-bake.hcl"
205+
206+
# For each bake target, copy its GHCR image to the Docker Hub tag
207+
# defined in the bake file. skopeo is pre-installed on ubuntu-latest.
208+
cd "official-templates/${TEMPLATE}"
209+
docker buildx bake --print 2>/dev/null \
210+
| jq -r '.target | to_entries[] | "\(.key) \(.value.tags[])"' \
211+
| while IFS=' ' read -r target dh_tag; do
212+
src="docker://ghcr.io/yottalabsai/${TEMPLATE}:${target}-sha-${SHA_SHORT}"
213+
dst="docker://${dh_tag}"
214+
echo "Promoting ${src} → ${dst}"
215+
skopeo copy \
216+
--src-creds "x-access-token:${GITHUB_TOKEN}" \
217+
--dest-creds "${DOCKERHUB_USERNAME}:${DOCKERHUB_TOKEN}" \
218+
"${src}" "${dst}"
219+
done

.github/workflows/lint.yml

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
name: Lint
2+
3+
on:
4+
pull_request:
5+
push:
6+
branches: [main]
7+
8+
jobs:
9+
hadolint:
10+
name: Dockerfile lint
11+
runs-on: ubuntu-latest
12+
steps:
13+
- uses: actions/checkout@v4
14+
15+
- name: Install hadolint
16+
run: |
17+
curl -sSLo /usr/local/bin/hadolint \
18+
https://github.com/hadolint/hadolint/releases/download/v2.12.0/hadolint-Linux-x86_64
19+
chmod +x /usr/local/bin/hadolint
20+
21+
- name: Run Hadolint
22+
run: |
23+
# Exclude files that use bash heredocs (cat <<'EOF') inside RUN instructions,
24+
# which confuse hadolint v2.12 parser — tracked for fix in a follow-up PR.
25+
#
26+
# Rules suppressed (GPU containers intentionally violate these best-practices):
27+
# DL3006 FROM without explicit tag (base image via ARG)
28+
# DL3008 pin apt-get versions (impractical for GPU base images)
29+
# DL3013 pin pip versions (managed per-template via requirements)
30+
# DL3059 multiple consecutive RUN (intentional for layer caching)
31+
# SC3010 [[ ]] in POSIX sh (false positive; SHELL is /bin/bash)
32+
# DL3022 COPY --from external bake context (proxy, scripts, logo)
33+
# DL3002 last USER is root (intentional for GPU/system containers)
34+
# DL3003 cd in RUN (used inside complex shell scripts)
35+
# DL3018 pin apk versions (base image version managed externally)
36+
# DL3042 pip cache dir (using --no-cache-dir where needed)
37+
find . -name Dockerfile \
38+
! -path './official-templates/comfyui/Dockerfile' \
39+
! -path './official-templates/dflash/Dockerfile' \
40+
! -path './official-templates/flux1dev-comfyui/Dockerfile' \
41+
! -path './official-templates/skyrl/Dockerfile' \
42+
! -path './official-templates/wan22-comfyui/Dockerfile' \
43+
-print0 | sort -z | xargs -0 hadolint \
44+
--failure-threshold error \
45+
--ignore DL3006 \
46+
--ignore DL3008 \
47+
--ignore DL3013 \
48+
--ignore DL3059 \
49+
--ignore SC3010 \
50+
--ignore DL3022 \
51+
--ignore DL3002 \
52+
--ignore DL3003 \
53+
--ignore DL3018 \
54+
--ignore DL3042

.hadolint.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
failure-threshold: warning
2+
3+
ignore:
4+
- DL3006 # FROM without explicit tag — base image supplied via build ARG
5+
- DL3008 # apt-get: pin package versions — impractical for GPU base images
6+
- DL3013 # pip: pin package versions — managed per-template via requirements
7+
- DL3059 # multiple consecutive RUN — intentional for layer caching control
8+
- SC3010 # [[ ]] in POSIX sh — false positive; SHELL is set to /bin/bash
9+
- DL3022 # COPY --from references external bake context (proxy, scripts, logo, base)
10+
- DL3002 # Last USER is root — intentional for GPU/system containers
11+
- DL3003 # Use WORKDIR instead of cd — cd used inside complex RUN scripts
12+
- DL3018 # Pin versions in apk add — base image version managed externally

README.md

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -38,18 +38,6 @@ All official images must include the following to integrate with the Yotta platf
3838
- `openssh-server` — SSH access to the container
3939
- `jupyterlab` — JupyterLab notebook access
4040

41-
### `yotta.yaml`
42-
43-
Each container folder must include a `yotta.yaml` describing its version and exposed services:
44-
45-
```yaml
46-
version: '1.0.0'
47-
services:
48-
- name: 'my-service'
49-
port: 9000
50-
proxy_port: 9001
51-
```
52-
5341
### `README.md`
5442

5543
Each container folder must include a `README.md`. This file is displayed on Docker Hub and in the Yotta platform UI. It is also served to users when a proxied port is not yet ready.

helper-templates/verify-nccl/Dockerfile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,17 @@ ENV NCCL_DEBUG=TRACE
55
WORKDIR /verify-nccl
66

77
# Install additional packages
8+
# hadolint ignore=DL3008
89
RUN apt-get update && apt-get install -y --no-install-recommends \
910
nano \
1011
pciutils \
1112
&& rm -rf /var/lib/apt/lists/*
1213

1314
RUN git clone https://github.com/NVIDIA/cuda-samples.git && \
14-
cd cuda-samples/Samples/0_Introduction/simpleP2P && \
15-
make
15+
make -C cuda-samples/Samples/0_Introduction/simpleP2P
1616

1717
COPY check_nccl.sh .
1818
RUN chmod +x check_nccl.sh
1919

2020
# Start Container
21-
CMD tail -f /dev/null
21+
CMD ["tail", "-f", "/dev/null"]

0 commit comments

Comments
 (0)