-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathspawn-headless-browser.sh
More file actions
executable file
·256 lines (208 loc) · 7.42 KB
/
spawn-headless-browser.sh
File metadata and controls
executable file
·256 lines (208 loc) · 7.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
#!/bin/bash
#
# spawn-headless-browser.sh
# Spawns a VM with a headless browser and returns a CDP URL for remote control
#
# Uses nginx to proxy Chrome's CDP endpoint (which only listens on localhost)
# to an externally accessible port. Nginx MUST listen on [::] (IPv6) for vers routing.
#
# Final URL: wss://<vm_id>.vm.vers.sh/devtools/browser/<id>
#
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
MEM_SIZE=2048
VCPU_COUNT=2
FS_SIZE=8192
cd "$SCRIPT_DIR"
# Helper function to retry vers execute commands
# Redirects all vers output to stderr since vers prints status messages to stdout
retry_execute() {
local vm_id=$1
local timeout=$2
shift 2
local max_retries=5
local retry=0
while [ $retry -lt $max_retries ]; do
# Capture both stdout and stderr, check exit code
local output
if output=$(vers execute "$vm_id" -t "$timeout" -- "$@" 2>&1); then
echo "$output" >&2
return 0
fi
echo "$output" >&2
retry=$((retry + 1))
echo " Retry $retry/$max_retries..." >&2
sleep 10
done
return 1
}
# Create vers.toml with default rootfs
cat > vers.toml << 'EOF'
[machine]
mem_size_mib = 2048
vcpu_count = 2
fs_size_vm_mib = 8192
[rootfs]
name = "default"
[builder]
name = "none"
dockerfile = "Dockerfile"
[kernel]
name = "default.bin"
EOF
# Build (uploads context)
echo "Building VM image..." >&2
vers build --fs-size $FS_SIZE --mem $MEM_SIZE --vcpu $VCPU_COUNT >&2
# Start the VM and capture the VM ID
echo "Starting VM..." >&2
VM_ID=$(vers run --mem-size $MEM_SIZE --vcpu-count $VCPU_COUNT --fs-size-vm $FS_SIZE 2>&1 | grep -oE '[a-f0-9-]{36}' | head -1)
if [ -z "$VM_ID" ]; then
VM_ID=$(vers head 2>/dev/null)
fi
if [ -z "$VM_ID" ]; then
echo "Error: Could not determine VM ID" >&2
exit 1
fi
echo "VM ID: $VM_ID" >&2
# Wait for VM to be ready with retries (use longer timeout for initial connection)
echo "Waiting for VM to be ready..." >&2
for i in {1..30}; do
if vers execute "$VM_ID" -t 60 -- echo ready >&2 2>&1; then
echo "VM is ready" >&2
break
fi
echo " Waiting... ($i/30)" >&2
sleep 5
done
echo "Installing dependencies (this may take a few minutes)..." >&2
# Install all dependencies in the VM
retry_execute "$VM_ID" 600 bash -c 'export DEBIAN_FRONTEND=noninteractive && apt-get update && apt-get -y -o Dpkg::Options::="--force-confdef" -o Dpkg::Options::="--force-confold" install \
libnss3 libnspr4 libatk1.0-0 libatk-bridge2.0-0 libcups2 \
libxkbcommon0 libxcomposite1 libxdamage1 libxfixes3 libxrandr2 \
libgbm1 libasound2t64 libpango-1.0-0 libcairo2 fonts-liberation \
xvfb nodejs npm nginx curl ca-certificates' >&2
# Create app directory and install puppeteer
echo "Installing puppeteer..." >&2
retry_execute "$VM_ID" 300 bash -c 'mkdir -p /app && cd /app && echo "{\"dependencies\":{\"puppeteer\":\"^22.0.0\"}}" > package.json && npm install' >&2
# Create browser-server.js using base64 encoding
BROWSER_JS=$(cat << 'JSEOF' | base64
const puppeteer = require("puppeteer");
const fs = require("fs");
(async () => {
console.log("Launching browser...");
const browser = await puppeteer.launch({
headless: "new",
timeout: 120000,
args: [
"--no-sandbox",
"--disable-setuid-sandbox",
"--disable-dev-shm-usage",
"--disable-gpu",
"--remote-debugging-port=9222",
"--remote-debugging-address=127.0.0.1",
],
});
const wsEndpoint = browser.wsEndpoint();
const wsPath = new URL(wsEndpoint).pathname;
console.log("Browser WS endpoint:", wsEndpoint);
console.log("WS path:", wsPath);
const info = { wsEndpoint, wsPath };
fs.writeFileSync("/tmp/browser-info.json", JSON.stringify(info));
console.log("BROWSER_READY");
process.on("SIGTERM", async () => {
await browser.close();
process.exit(0);
});
// Keep alive
setInterval(() => {}, 60000);
})();
JSEOF
)
retry_execute "$VM_ID" 60 bash -c "echo '$BROWSER_JS' | base64 -d > /app/browser-server.js"
# Create nginx config - MUST listen on [::] for vers to route traffic
NGINX_CONF=$(cat << 'NGINXEOF' | base64
worker_processes 1;
error_log /var/log/nginx/error.log warn;
pid /var/run/nginx.pid;
events { worker_connections 1024; }
http {
map $http_upgrade $connection_upgrade {
default upgrade;
'' close;
}
server {
listen [::]:80 ipv6only=off;
location / {
proxy_pass http://127.0.0.1:9222;
proxy_http_version 1.1;
proxy_set_header Host localhost:9222;
}
location ~ ^/devtools {
proxy_pass http://127.0.0.1:9222;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection $connection_upgrade;
proxy_set_header Host localhost:9222;
proxy_read_timeout 86400;
}
}
}
NGINXEOF
)
retry_execute "$VM_ID" 60 bash -c "echo '$NGINX_CONF' | base64 -d > /etc/nginx/nginx.conf"
# Start services
echo "Starting browser services..." >&2
# Give VM time to settle after heavy installations
echo "Waiting for VM to settle..." >&2
sleep 5
# Start Xvfb with longer timeout
echo "Starting Xvfb..." >&2
retry_execute "$VM_ID" 120 bash -c 'pgrep Xvfb || (Xvfb :99 -screen 0 1920x1080x24 &); sleep 2; pgrep Xvfb && echo "Xvfb started"' || true
# Start browser with longer timeout
echo "Starting browser..." >&2
retry_execute "$VM_ID" 180 bash -c 'cd /app && DISPLAY=:99 nohup node browser-server.js > /tmp/browser.log 2>&1 & sleep 3; echo "Browser process started"'
echo "Waiting for browser to initialize..." >&2
sleep 30
# Start nginx (stop first in case it's already running)
retry_execute "$VM_ID" 30 bash -c 'nginx -s stop 2>/dev/null || true; nginx'
# Verify nginx is actually running
echo "Verifying nginx started..." >&2
for i in {1..5}; do
if vers execute "$VM_ID" -t 30 -- pgrep nginx >&2 2>&1; then
echo "nginx is running" >&2
break
fi
echo " Waiting for nginx... ($i/5)" >&2
sleep 2
# Try starting nginx again
vers execute "$VM_ID" -t 30 -- nginx >&2 2>&1 || true
done
sleep 3
# Verify browser is running by checking the external endpoint
echo "Verifying browser is accessible..." >&2
for i in {1..15}; do
if curl -s "https://${VM_ID}.vm.vers.sh/json/version" | grep -q "Browser"; then
break
fi
echo " Waiting for browser... ($i/15)" >&2
sleep 3
done
# Get the WebSocket URL (use tr to handle multi-line JSON)
WS_URL=$(curl -s "https://${VM_ID}.vm.vers.sh/json/version" | tr -d '\n ' | grep -o '"webSocketDebuggerUrl":"[^"]*"' | cut -d'"' -f4 | sed "s|ws://localhost:9222|wss://${VM_ID}.vm.vers.sh|")
if [ -z "$WS_URL" ] || echo "$WS_URL" | grep -q "localhost"; then
echo "Warning: Could not get WebSocket URL, browser may not be ready" >&2
WS_URL="wss://${VM_ID}.vm.vers.sh/devtools/browser/unknown"
fi
# Validate the URL looks correct before outputting
if ! echo "$WS_URL" | grep -qE '^wss://[a-f0-9-]+\.vm\.vers\.sh/devtools/browser/'; then
echo "Error: Invalid WebSocket URL generated: $WS_URL" >&2
exit 1
fi
# Output the CDP URL (this is the only stdout output)
echo "$WS_URL"
# Output helpful info to stderr
echo "" >&2
echo "=== Headless Browser Ready ===" >&2
echo "VM ID: $VM_ID" >&2
echo "HTTP: https://${VM_ID}.vm.vers.sh/json/version" >&2
echo "To stop: vers delete -y $VM_ID" >&2