Skip to content

Commit 90f7182

Browse files
Create GCU Container
1 parent 9c98c48 commit 90f7182

File tree

21 files changed

+948
-0
lines changed

21 files changed

+948
-0
lines changed
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
FROM docker-config-engine-bookworm-{{DOCKER_USERNAME}}:{{DOCKER_USERTAG}} AS builder
2+
3+
# Install toolchain prerequisites for building Rust + protobuf code generation.
4+
# protobuf-compiler is required by tonic-build to compile watchdog.proto.
5+
RUN apt-get update && apt-get install -y \
6+
build-essential \
7+
protobuf-compiler \
8+
&& rm -rf /var/lib/apt/lists/*
9+
10+
# tonic-build uses the PROTOC env var to locate the protobuf compiler.
11+
# On Bookworm this must be set explicitly; without it 'cargo build' fails
12+
# with "could not find `protoc` installation".
13+
ENV PROTOC=/usr/bin/protoc
14+
15+
# Install Rust/Cargo via rustup (pinned toolchain for reproducible builds).
16+
ARG RUST_ROOT=/usr/.cargo
17+
RUN RUSTUP_HOME=$RUST_ROOT CARGO_HOME=$RUST_ROOT bash -c \
18+
'curl --proto "=https" -sSf https://sh.rustup.rs | sh -s -- --default-toolchain 1.79.0 -y'
19+
ENV RUSTUP_HOME=$RUST_ROOT
20+
ENV PATH=$PATH:$RUST_ROOT/bin
21+
22+
# Copy the GCU watchdog Rust source into the build stage.
23+
# The 'watchdog/' directory inside the docker build context is populated from
24+
# src/gcu-watchdog/ by the SONiC build system before 'docker build' is invoked.
25+
WORKDIR /watchdog
26+
COPY watchdog/ ./
27+
28+
# Build the release binary. tonic-build will invoke protoc to compile
29+
# proto/watchdog.proto into Rust gRPC server stubs during this step.
30+
RUN cargo build --release
31+
32+
# ---------------------------------------------------------------------------
33+
# Final (runtime) stage
34+
# ---------------------------------------------------------------------------
35+
FROM docker-config-engine-bookworm-{{DOCKER_USERNAME}}:{{DOCKER_USERTAG}}
36+
37+
ARG docker_container_name
38+
ARG image_version
39+
RUN [ -f /etc/rsyslog.conf ] && sed -ri "s/%syslogtag%/$docker_container_name#%syslogtag%/;" /etc/rsyslog.conf
40+
41+
ENV DEBIAN_FRONTEND=noninteractive
42+
ENV IMAGE_VERSION=$image_version
43+
44+
# How often (seconds) the watchdog re-checks the venv checksum.
45+
# Override at container runtime with -e GCU_WATCHDOG_INTERVAL_SECS=<n>.
46+
ENV GCU_WATCHDOG_INTERVAL_SECS=30
47+
48+
# Copy supervisord configuration
49+
COPY ["supervisord.conf", "/etc/supervisor/conf.d/"]
50+
51+
# Copy the compiled watchdog binary from the builder stage
52+
COPY --from=builder /watchdog/target/release/gcu_watchdog /usr/bin/gcu_watchdog
53+
RUN chmod +x /usr/bin/gcu_watchdog
54+
55+
ENTRYPOINT ["/usr/local/bin/supervisord"]
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
{
2+
"name": "gcu-watchdog",
3+
"has_per_asic_scope": false,
4+
"has_global_scope": true,
5+
"auto_restart": "enabled",
6+
"high_mem_alert": "disabled",
7+
"set_owner": "local"
8+
}
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
[supervisord]
2+
logfile_maxbytes=1MB
3+
logfile_backups=2
4+
nodaemon=true
5+
6+
[eventlistener:dependent-startup]
7+
command=python3 -m supervisord_dependent_startup
8+
autostart=true
9+
autorestart=unexpected
10+
startretries=0
11+
exitcodes=0,3
12+
events=PROCESS_STATE
13+
buffer_size=1024
14+
15+
[program:rsyslogd]
16+
command=/usr/sbin/rsyslogd -n -iNONE
17+
priority=1
18+
autostart=false
19+
autorestart=unexpected
20+
stdout_logfile=NONE
21+
stdout_syslog=true
22+
stderr_logfile=NONE
23+
stderr_syslog=true
24+
dependent_startup=true
25+
26+
[program:gcu_watchdog]
27+
command=/usr/bin/gcu_watchdog
28+
priority=3
29+
autostart=false
30+
# Restart on unexpected exit (crash/panic) but not on clean exit code 0
31+
# (e.g., graceful SIGTERM shutdown initiated by supervisord).
32+
autorestart=unexpected
33+
exitcodes=0
34+
startsecs=0
35+
stdout_logfile=NONE
36+
stdout_syslog=true
37+
stderr_logfile=NONE
38+
stderr_syslog=true
39+
dependent_startup=true
40+
dependent_startup_wait_for=rsyslogd:running

dockers/docker-gcu/Dockerfile.j2

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
{% from "dockers/dockerfile-macros.j2" import install_debian_packages, install_python_wheels, copy_files %}
2+
ARG BASE=docker-config-engine-bookworm-{{DOCKER_USERNAME}}:{{DOCKER_USERTAG}}
3+
4+
FROM $BASE
5+
6+
ARG docker_container_name
7+
ARG image_version
8+
9+
## Make apt-get non-interactive
10+
ENV DEBIAN_FRONTEND=noninteractive
11+
12+
# Pass the image_version to container
13+
ENV IMAGE_VERSION=$image_version
14+
15+
RUN [ -f /etc/rsyslog.conf ] && sed -ri "s/%syslogtag%/$docker_container_name#%syslogtag%/;" /etc/rsyslog.conf
16+
17+
# Install Python venv support.
18+
# Note: 'sudo' is intentionally omitted — SONiC containers run as root,
19+
# so there is no need for sudo inside the container.
20+
RUN apt-get update && \
21+
apt-get install -f -y \
22+
python3-venv && \
23+
apt-get clean -y && \
24+
apt-get autoclean -y && \
25+
apt-get autoremove -y && \
26+
rm -rf /var/lib/apt/lists/*
27+
28+
{% if docker_sonic_gcu_whls.strip() %}
29+
# Copy locally-built sonic-gcu Python wheel and its dependencies
30+
{{ copy_files("python-wheels/", docker_sonic_gcu_whls.split(' '), "/python-wheels/") }}
31+
{% endif %}
32+
33+
# Copy init/health-check script and make it executable
34+
COPY ["docker-init.sh", "/usr/bin/gcu-init.sh"]
35+
RUN chmod +x /usr/bin/gcu-init.sh
36+
37+
# Copy supervisord configuration
38+
COPY ["supervisord.conf", "/etc/supervisor/conf.d/"]
39+
40+
ENTRYPOINT ["/usr/local/bin/supervisord"]

dockers/docker-gcu/docker-init.sh

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
#!/bin/bash
2+
# docker-init.sh — GCU venv setup and health-check script.
3+
#
4+
# Usage:
5+
# gcu-init.sh setup — create /opt/gcu-venv and install the sonic-gcu wheel
6+
# gcu-init.sh healthcheck — run lightweight smoke-tests against the installed GCU
7+
#
8+
# This script is invoked by supervisord as two separate one-shot programs so that
9+
# supervisord_dependent_startup can gate execution order:
10+
# 1. gcu-setup (priority=2): runs 'setup' — exits 0 on success
11+
# 2. gcu-healthcheck (priority=3): runs 'healthcheck' — exits 0 on success
12+
13+
set -e
14+
15+
VENV_DIR="/opt/gcu-venv"
16+
WHEEL_DIR="/python-wheels"
17+
WHEEL_GLOB="${WHEEL_DIR}/sonic_gcu-*.whl"
18+
SETUP_SENTINEL="${VENV_DIR}/.setup_complete"
19+
20+
###############################################################################
21+
# setup: create venv and install wheel
22+
###############################################################################
23+
do_setup() {
24+
echo "[gcu-init] Starting GCU venv setup ..."
25+
26+
# Create venv with --system-site-packages so C-extension packages
27+
# (swsscommon, sonic-py-common, libyang Python bindings, etc.) that are
28+
# installed system-wide are visible inside the venv without reinstalling
29+
# them. Pure-Python deps that are NOT on the system path must still be
30+
# declared in the wheel's install_requires and will be downloaded/installed
31+
# into the venv itself.
32+
if [ ! -d "${VENV_DIR}" ]; then
33+
python3 -m venv --system-site-packages "${VENV_DIR}"
34+
echo "[gcu-init] Created venv at ${VENV_DIR}"
35+
else
36+
echo "[gcu-init] Venv already exists at ${VENV_DIR}, skipping creation"
37+
fi
38+
39+
# Install the sonic-gcu wheel.
40+
# shellcheck disable=SC2086
41+
WHEEL_FILE=$(ls ${WHEEL_GLOB} 2>/dev/null | head -n 1)
42+
if [ -z "${WHEEL_FILE}" ]; then
43+
echo "[gcu-init] ERROR: No sonic_gcu wheel found in ${WHEEL_DIR}" >&2
44+
exit 1
45+
fi
46+
47+
echo "[gcu-init] Installing ${WHEEL_FILE} into ${VENV_DIR} ..."
48+
"${VENV_DIR}/bin/pip" install --no-index --system-site-packages "${WHEEL_FILE}"
49+
50+
# Write a sentinel file so the healthcheck can verify setup completed OK.
51+
touch "${SETUP_SENTINEL}"
52+
echo "[gcu-init] GCU venv setup complete."
53+
}
54+
55+
###############################################################################
56+
# healthcheck: smoke-test venv GCU + host GCU
57+
###############################################################################
58+
do_healthcheck() {
59+
echo "[gcu-init] Running GCU health checks ..."
60+
61+
# Gate on setup sentinel — if setup did not complete successfully, skip
62+
# the healthcheck to avoid misleading 'config apply-patch' errors.
63+
if [[ ! -f "${SETUP_SENTINEL}" ]]; then
64+
echo "[gcu-init] ERROR: Setup sentinel not found at ${SETUP_SENTINEL}." >&2
65+
echo "[gcu-init] 'gcu-setup' may have failed. Skipping healthcheck." >&2
66+
exit 1
67+
fi
68+
69+
# Health-check 1: venv GCU — apply an empty JSON patch via the venv binary.
70+
# An empty patch list '[]' is a no-op and must succeed without error.
71+
echo "[gcu-init] Checking venv GCU (${VENV_DIR}/bin/config apply-patch) ..."
72+
if echo '[]' | "${VENV_DIR}/bin/config" apply-patch /dev/stdin; then
73+
echo "[gcu-init] Venv GCU health check: PASS"
74+
else
75+
echo "[gcu-init] Venv GCU health check: FAIL" >&2
76+
exit 1
77+
fi
78+
79+
# Health-check 2: host (system-installed) GCU — same empty patch via the
80+
# system-wide 'config' CLI. SONiC containers run as root so sudo is not
81+
# needed; running it directly avoids requiring a sudoers configuration
82+
# inside the container.
83+
echo "[gcu-init] Checking host GCU (config apply-patch) ..."
84+
if echo '[]' | config apply-patch /dev/stdin; then
85+
echo "[gcu-init] Host GCU health check: PASS"
86+
else
87+
echo "[gcu-init] Host GCU health check: FAIL" >&2
88+
exit 1
89+
fi
90+
91+
echo "[gcu-init] All GCU health checks passed."
92+
}
93+
94+
###############################################################################
95+
# Dispatch
96+
###############################################################################
97+
COMMAND="${1:-setup}"
98+
99+
case "${COMMAND}" in
100+
setup)
101+
do_setup
102+
;;
103+
healthcheck)
104+
do_healthcheck
105+
;;
106+
*)
107+
echo "Usage: $0 {setup|healthcheck}" >&2
108+
exit 1
109+
;;
110+
esac

dockers/docker-gcu/feature.json

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
{
2+
"name": "gcu",
3+
"has_per_asic_scope": false,
4+
"has_global_scope": true,
5+
"auto_restart": "enabled",
6+
"high_mem_alert": "disabled",
7+
"set_owner": "local"
8+
}
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
[supervisord]
2+
logfile_maxbytes=1MB
3+
logfile_backups=2
4+
nodaemon=true
5+
6+
[eventlistener:dependent-startup]
7+
command=python3 -m supervisord_dependent_startup
8+
autostart=true
9+
autorestart=unexpected
10+
startretries=0
11+
exitcodes=0,3
12+
events=PROCESS_STATE
13+
buffer_size=1024
14+
15+
[program:rsyslogd]
16+
command=/usr/sbin/rsyslogd -n -iNONE
17+
priority=1
18+
autostart=false
19+
autorestart=unexpected
20+
stdout_logfile=NONE
21+
stdout_syslog=true
22+
stderr_logfile=NONE
23+
stderr_syslog=true
24+
dependent_startup=true
25+
26+
# One-shot program: create the GCU venv and install the wheel.
27+
# priority=2 ensures rsyslogd is up first.
28+
# startsecs=0 means supervisord does not wait for it to stay running.
29+
# autorestart=false because it exits intentionally on success.
30+
[program:gcu-setup]
31+
command=/usr/bin/gcu-init.sh setup
32+
priority=2
33+
autostart=false
34+
autorestart=false
35+
startsecs=0
36+
stdout_logfile=/dev/stdout
37+
stdout_logfile_maxbytes=0
38+
stderr_logfile=/dev/stderr
39+
stderr_logfile_maxbytes=0
40+
dependent_startup=true
41+
dependent_startup_wait_for=rsyslogd:running
42+
43+
# Health-check program: verifies venv GCU and host GCU respond correctly.
44+
# Waits for gcu-setup to have exited (supervisord_dependent_startup
45+
# 'exited' state). The docker-init.sh healthcheck subcommand additionally
46+
# checks for the .setup_complete sentinel file so it exits cleanly if
47+
# gcu-setup failed before the file was written.
48+
[program:gcu-healthcheck]
49+
command=/usr/bin/gcu-init.sh healthcheck
50+
priority=3
51+
autostart=false
52+
autorestart=false
53+
startsecs=0
54+
stdout_logfile=/dev/stdout
55+
stdout_logfile_maxbytes=0
56+
stderr_logfile=/dev/stderr
57+
stderr_logfile_maxbytes=0
58+
dependent_startup=true
59+
dependent_startup_wait_for=gcu-setup:exited

files/build_templates/sonic_debian_extension.j2

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1082,6 +1082,8 @@ sudo LANG=C cp $SCRIPTS_DIR/database.sh $FILESYSTEM_ROOT/usr/local/bin/database.
10821082
sudo LANG=C cp $SCRIPTS_DIR/snmp.sh $FILESYSTEM_ROOT/usr/local/bin/snmp.sh
10831083
sudo LANG=C cp $SCRIPTS_DIR/telemetry.sh $FILESYSTEM_ROOT/usr/local/bin/telemetry.sh
10841084
sudo LANG=C cp $SCRIPTS_DIR/gnmi.sh $FILESYSTEM_ROOT/usr/local/bin/gnmi.sh
1085+
sudo LANG=C cp $SCRIPTS_DIR/gcu.sh $FILESYSTEM_ROOT/usr/local/bin/gcu.sh
1086+
sudo LANG=C cp $SCRIPTS_DIR/gcu-watchdog.sh $FILESYSTEM_ROOT/usr/local/bin/gcu-watchdog.sh
10851087
sudo LANG=C cp $SCRIPTS_DIR/otel.sh $FILESYSTEM_ROOT/usr/local/bin/otel.sh
10861088
sudo LANG=C cp $SCRIPTS_DIR/bmp.sh $FILESYSTEM_ROOT/usr/local/bin/bmp.sh
10871089
sudo LANG=C cp $SCRIPTS_DIR/mgmt-framework.sh $FILESYSTEM_ROOT/usr/local/bin/mgmt-framework.sh

files/scripts/gcu-watchdog.sh

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
#!/bin/bash
2+
3+
function debug()
4+
{
5+
/usr/bin/logger "$1"
6+
}
7+
8+
start() {
9+
debug "Starting ${SERVICE}$DEV service..."
10+
11+
# start service docker
12+
/usr/bin/${SERVICE}.sh start $DEV
13+
debug "Started ${SERVICE}$DEV service..."
14+
}
15+
16+
wait() {
17+
/usr/bin/${SERVICE}.sh wait $DEV
18+
}
19+
20+
stop() {
21+
debug "Stopping ${SERVICE}$DEV service..."
22+
23+
/usr/bin/${SERVICE}.sh stop $DEV
24+
debug "Stopped ${SERVICE}$DEV service..."
25+
}
26+
27+
DEV=$2
28+
29+
SERVICE="gcu-watchdog"
30+
31+
case "$1" in
32+
start|wait|stop)
33+
$1
34+
;;
35+
*)
36+
echo "Usage: $0 {start|wait|stop}"
37+
exit 1
38+
;;
39+
esac

0 commit comments

Comments
 (0)