Skip to content

Commit 3265db9

Browse files
authored
fix(build): correct docker build process for mobile sandbox (#353)
1 parent ed121f5 commit 3265db9

File tree

6 files changed

+401
-282
lines changed

6 files changed

+401
-282
lines changed

.github/workflows/build_sandbox_image.yml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,18 @@ jobs:
9696
export PIP_DEFAULT_TIMEOUT=300
9797
pip install -q -e ".[dev,ext]"
9898
99+
- name: Attempt to load Android kernel modules
100+
if: contains(github.event.inputs.build_types, 'mobile')
101+
run: |
102+
echo "1/4: Installing linux-modules-extra for kernel $(uname -r)..."
103+
sudo apt-get update -y && sudo apt-get install -y linux-modules-extra-$(uname -r) || echo "WARNING: apt-get install failed, but continuing."
104+
echo "2/4: Attempting to load binder_linux module..."
105+
sudo modprobe binder_linux devices="binder,hwbinder,vndbinder" || echo "INFO: modprobe binder_linux failed."
106+
echo "3/4: Attempting to load ashmem_linux module..."
107+
sudo modprobe ashmem_linux || echo "INFO: modprobe ashmem_linux failed as expected."
108+
echo "4/4: Checking loaded modules with lsmod..."
109+
lsmod | grep -E "binder|ashmem" || echo "INFO: No 'binder' or 'ashmem' modules found in lsmod output."
110+
99111
- name: Run build script for all types
100112
env:
101113
AUTO_BUILD: "true"

src/agentscope_runtime/sandbox/box/mobile/Dockerfile

Lines changed: 7 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -46,90 +46,6 @@ RUN mkdir -p /prod_bundle && \
4646
cd /prod_bundle && \
4747
npm install --production
4848

49-
# =================================================================
50-
# Stage 3: Fetch Redroid Docker image (redroid-fetcher)
51-
# =================================================================
52-
FROM docker:28-dind AS redroid-fetcher
53-
54-
# NOTE:
55-
# This stage secures the build process by pulling a third-party image (Redroid)
56-
# using an immutable digest (SHA-256 hash) instead of a mutable tag. This prevents
57-
# supply chain attacks where a tag could be retargeted to a malicious image.
58-
#
59-
# To achieve this, it starts a Docker daemon inside the build environment
60-
# (Docker-in-Docker), pulls the specified Redroid image, and saves it as a tarball.
61-
#
62-
# --- DOCKER-IN-DOCKER PRIVILEGE WARNING ---
63-
# This approach requires the build environment to support running a Docker daemon with
64-
# sufficient privileges (e.g., privileged containers with proper cgroup access). In
65-
# many CI/CD or restricted environments, such privileges may not be available, which
66-
# can cause this stage to fail due to permission or daemon startup issues.
67-
#
68-
# If you encounter build failures at this stage, a recommended and more secure
69-
# alternative is to perform the pull and save manually on a trusted host machine:
70-
#
71-
# 1. On a host with Docker access, manually pull the image using its immutable
72-
# digest. Choose the digest corresponding to your target architecture:
73-
#
74-
# # For linux/amd64 (most common for servers and PCs):
75-
# docker pull redroid/redroid@sha256:d1ca0815eb68139a43d25a835e374559e9d18f5d5cea1a4288d4657c0074fb8d
76-
#
77-
# # For linux/arm64 (Apple M-series, Raspberry Pi, AWS Graviton, etc.):
78-
# docker pull redroid/redroid@sha256:f070231146ba5043bdb225a1f51c77ef0765c1157131b26cb827078bf536c922
79-
#
80-
# 2. Then, save the pulled image to a tarball. Use the same digest as in step 1.
81-
# (Example for amd64):
82-
# docker save -o redroid.tar redroid/redroid@sha256:d1ca0815eb68139a43d25a835e374559e9d18f5d5cea1a4288d4657c0074fb8d
83-
#
84-
# 3. Place the resulting `redroid.tar` in the Docker build context (e.g., next to
85-
# this Dockerfile, in a path like `src/agentscope_runtime/sandbox/box/mobile/`).
86-
#
87-
# 4. Remove or skip this `redroid-fetcher` stage entirely, and in the final stage,
88-
# replace the line:
89-
# COPY --from=redroid-fetcher /redroid.tar /redroid.tar
90-
# with a direct copy from your build context:
91-
# COPY src/agentscope_runtime/sandbox/box/mobile/redroid.tar /redroid.tar
92-
#
93-
# This avoids running Docker-in-Docker and is more compatible with restricted build
94-
# environments, while still maintaining supply chain security.
95-
96-
# Pin the redroid image to an immutable digest for security and reproducibility.
97-
# The default digest is for the linux/amd64 architecture.
98-
# To build for linux/arm64, pass the --build-arg flag to the docker build command:
99-
# --build-arg REDROID_DIGEST=sha256:f070231146ba5043bdb225a1f51c77ef0765c1157131b26cb827078bf536c922
100-
ARG REDROID_DIGEST=sha256:d1ca0815eb68139a43d25a835e374559e9d18f5d5cea1a4288d4657c0074fb8d
101-
102-
# --- Display a warning to the user before the privileged operation ---
103-
RUN echo "" && \
104-
echo "========================================================================" && \
105-
echo " >>> WARNING: Privileged Operation Ahead <<<" && \
106-
echo "========================================================================" && \
107-
echo "The following step will start a Docker-in-Docker (DinD) daemon." && \
108-
echo "This operation requires high privileges (e.g., the --privileged flag)" && \
109-
echo "and may fail in restricted environments like CI/CD pipelines." && \
110-
echo "" && \
111-
echo " --- IF THIS STEP FAILS, USE THE ALTERNATIVE BELOW ---" && \
112-
echo "Manually 'docker pull' and 'docker save' the image to a .tar file, then" && \
113-
echo "copy it into the build context. For detailed instructions, please refer" && \
114-
echo "to the comments at the top of this stage in the Dockerfile:" && \
115-
echo " src/agentscope_runtime/sandbox/box/mobile/Dockerfile" && \
116-
echo "========================================================================" && \
117-
echo ""
118-
119-
# --- Run the Docker-in-Docker process ---
120-
RUN dockerd-entrypoint.sh & \
121-
TIMEOUT=30; \
122-
while ! docker info > /dev/null 2>&1; do \
123-
if [ $TIMEOUT -le 0 ]; then \
124-
echo "Docker daemon did not become ready in time." >&2; \
125-
exit 1; \
126-
fi; \
127-
sleep 1; \
128-
TIMEOUT=$((TIMEOUT - 1)); \
129-
done && \
130-
docker pull redroid/redroid@${REDROID_DIGEST} && \
131-
docker save -o /redroid.tar redroid/redroid@${REDROID_DIGEST}
132-
13349
# =================================================================
13450
# Final Stage: Production Image
13551
# =================================================================
@@ -167,8 +83,13 @@ COPY src/agentscope_runtime/sandbox/box/mobile/box/mcp_server_configs.json /app/
16783
COPY src/agentscope_runtime/sandbox/box/mobile/box/scripts/start.sh /start.sh
16884
RUN chmod +x /start.sh
16985

170-
# 6. Copy the offline redroid image from the fetcher stage
171-
COPY --from=redroid-fetcher /redroid.tar /redroid.tar
86+
RUN echo "[BUILD NOTE] This step requires:" && \
87+
echo " 'src/agentscope_runtime/sandbox/box/mobile/redroid.tar'." && \
88+
echo "This file is generated by the build script ('build.py')." && \
89+
echo "If building manually, please prepare the necessary files."
90+
# 6. Copy the offline redroid image prepared by the build script
91+
# NOTE: The build.py script is responsible for creating this file in the build context.
92+
COPY src/agentscope_runtime/sandbox/box/mobile/redroid.tar /redroid.tar
17293

17394
# 7. Set entrypoint
17495
ENTRYPOINT ["/start.sh"]
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
# -*- coding: utf-8 -*-
2+
import platform
3+
import subprocess
4+
import logging
5+
6+
7+
class HostPrerequisiteError(Exception):
8+
"""Exception raised when host prerequisites
9+
for MobileSandbox are not met."""
10+
11+
12+
logger = logging.getLogger(__name__)
13+
14+
15+
def check_mobile_sandbox_host_readiness() -> None:
16+
"""
17+
Performs a check of the host environment to ensure it has the necessary
18+
modules (like binder_linux) to run the MobileSandbox.
19+
"""
20+
logger.info(
21+
"Performing host environment check for MobileSandbox readiness...",
22+
)
23+
24+
architecture = platform.machine().lower()
25+
if architecture in ("aarch64", "arm64"):
26+
logger.warning(
27+
"\n======================== WARNING ========================\n"
28+
"ARM64/aarch64 architecture detected (e.g., Apple M-series).\n"
29+
"Running this mobile sandbox on a non-x86_64 host may lead \n"
30+
" to unexpected compatibility or performance issues.\n"
31+
"=========================================================",
32+
)
33+
34+
os_type = platform.system()
35+
if os_type == "Linux":
36+
try:
37+
result = subprocess.run(
38+
["lsmod"],
39+
capture_output=True,
40+
text=True,
41+
check=True,
42+
)
43+
loaded_modules = result.stdout
44+
except (FileNotFoundError, subprocess.CalledProcessError):
45+
loaded_modules = ""
46+
logger.warning(
47+
"Could not execute 'lsmod' to verify kernel modules.",
48+
)
49+
50+
if "binder_linux" not in loaded_modules:
51+
error_message = (
52+
"\n========== HOST PREREQUISITE FAILED ==========\n"
53+
"MobileSandbox requires specific kernel modules"
54+
" that appear to be missing or not loaded.\n\n"
55+
"To fix this, please run the following commands"
56+
" on your Linux host:\n\n"
57+
"## Install required kernel modules\n"
58+
"sudo apt update"
59+
" && sudo apt install -y linux-modules-extra-`uname -r`\n"
60+
"sudo modprobe binder_linux"
61+
' devices="binder,hwbinder,vndbinder"\n'
62+
"## (Optional) Load the ashmem driver for older kernels\n"
63+
"sudo modprobe ashmem_linux\n"
64+
"=================================================="
65+
)
66+
raise HostPrerequisiteError(error_message)
67+
68+
if os_type == "Windows":
69+
try:
70+
result = subprocess.run(
71+
["wsl", "lsmod"],
72+
capture_output=True,
73+
text=True,
74+
check=True,
75+
encoding="utf-8",
76+
)
77+
loaded_modules = result.stdout
78+
except (FileNotFoundError, subprocess.CalledProcessError):
79+
loaded_modules = ""
80+
logger.warning(
81+
"Could not execute 'wsl lsmod' to verify kernel modules.",
82+
)
83+
84+
if "binder_linux" not in loaded_modules:
85+
error_message = (
86+
"\n========== HOST PREREQUISITE FAILED ==========\n"
87+
"MobileSandbox on Windows requires Docker Desktop "
88+
"with the WSL 2 backend.\n"
89+
"The required kernel modules seem to be missing "
90+
"in your WSL 2 environment.\n\n"
91+
"To fix this, please follow these steps:\n\n"
92+
"1. **Ensure Docker Desktop is using WSL 2**:\n"
93+
" - Open Docker Desktop -> Settings -> General.\n"
94+
" - Make sure 'Use the WSL 2 based engine' "
95+
"is checked.\n\n"
96+
"2. **Ensure WSL is installed and updated**:\n"
97+
" - Open PowerShell or Command Prompt "
98+
"as Administrator.\n"
99+
" - Run: wsl --install\n"
100+
" - Run: wsl --update\n"
101+
" (An update usually installs a recent Linux kernel "
102+
"with the required modules.)\n\n"
103+
"3. **Verify manually (Optional)**:\n"
104+
" - After updating, run 'wsl lsmod | findstr binder' "
105+
"in your terminal.\n"
106+
" - If it shows 'binder_linux', "
107+
"the issue should be resolved.\n"
108+
"=================================================="
109+
)
110+
raise HostPrerequisiteError(error_message)
111+
112+
logger.info("Host environment check passed.")

src/agentscope_runtime/sandbox/box/mobile/box/scripts/start.sh

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,25 @@
11
#!/bin/sh
22
set -e
33

4+
RUN_MODE="NORMAL"
5+
LOCK_FILE="/var/run/agentscope_first_run.lock"
6+
7+
if [ "$BUILT_BY_SCRIPT" = "true" ]; then
8+
echo "--> 'BUILT_BY_SCRIPT' flag detected. Activating advanced run-mode detection."
9+
if [ ! -f "$LOCK_FILE" ]; then
10+
RUN_MODE="HEALTH_CHECK"
11+
echo "--> First run under build script detected (Health Check Mode). Creating lock file..."
12+
mkdir -p "$(dirname "$LOCK_FILE")"
13+
touch "$LOCK_FILE"
14+
else
15+
RUN_MODE="NORMAL"
16+
echo "--> Subsequent run under build script detected (Normal Mode)."
17+
fi
18+
else
19+
echo "--> 'BUILT_BY_SCRIPT' flag not found. Assuming standard Normal Mode."
20+
RUN_MODE="NORMAL"
21+
fi
22+
423
echo "--- Phase 1: Starting internal Docker Daemon ---"
524
dockerd-entrypoint.sh &
625
dockerd_pid=$!
@@ -11,14 +30,19 @@ done
1130
echo "--> Internal Docker Daemon is UP!"
1231

1332
echo "--- Phase 2: Loading and starting nested Redroid container ---"
14-
REDROID_IMAGE="redroid/redroid:11.0.0-240527"
33+
REDROID_IMAGE="agentscope/redroid:internal"
1534

1635
if [ -z "$(docker images -q "$REDROID_IMAGE")" ]; then
1736
if [ -f /redroid.tar ]; then
1837
echo "--> Loading Redroid image from /redroid.tar..."
1938
docker load -i /redroid.tar
2039
echo "--> Successfully loaded Redroid image."
21-
rm /redroid.tar
40+
if [ "$RUN_MODE" = "NORMAL" ]; then
41+
echo "--> Normal mode: Removing /redroid.tar."
42+
rm /redroid.tar
43+
else # RUN_MODE is "HEALTH_CHECK"
44+
echo "--> Health check mode: Preserving /redroid.tar for commit."
45+
fi
2246
else
2347
echo "[FATAL ERROR] Built-in /redroid.tar not found!"
2448
exit 1
@@ -27,6 +51,11 @@ else
2751
echo "--> Redroid image already exists."
2852
fi
2953

54+
if [ -z "$(docker images -q "$REDROID_IMAGE")" ]; then
55+
echo "[FATAL ERROR] Failed to load Redroid image '$REDROID_IMAGE' from tarball."
56+
exit 1
57+
fi
58+
3059
if [ "$(docker ps -q -f name=redroid_nested)" ]; then
3160
echo "Nested redroid container is already running."
3261
else
@@ -105,5 +134,4 @@ echo "--> Nginx & FastAPI & WS-Scrcpy services started."
105134
supervisorctl status
106135

107136
echo "--> Orchestration complete. System is fully operational."
108-
wait $dockerd_pid
109-
137+
wait $dockerd_pid

0 commit comments

Comments
 (0)