Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,8 @@ For deeper setup details, direct repo development, env files, Docker internals,

The first Google AI Search run now uses a shared persistent browser profile instead of a seeded browser profile committed into the repo.

For Docker and VPS installs without a real display, the API container can expose that browser session through noVNC at `https://your-vnc-host/vnc.html`.

1. Open `/playground/operators/google/ai-search`
2. Click `Build Cookies`
3. Let the shared browser open Google
Expand Down
16 changes: 14 additions & 2 deletions apps/api/src/services/scrape/BrowserService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,14 @@ class BrowserService {
return Boolean(process.env.DISPLAY?.trim());
}

private isManagedVirtualDisplay(): boolean {
if (process.platform !== 'linux') {
return false;
}

return process.env.HEADLESSX_VIRTUAL_DISPLAY_ACTIVE?.trim() === '1';
}

private readCookieReadyMarker(): CookieReadyMarker | null {
try {
if (!fs.existsSync(this.cookieReadyMarkerPath)) {
Expand Down Expand Up @@ -349,7 +357,8 @@ class BrowserService {
if (options?.cookieBootstrap) {
this.cookieBootstrapActive = true;
this.cookieBootstrapStartedAt = new Date().toISOString();
this.cookieBootstrapUsingVirtualDisplay = launchMode === 'virtual';
this.cookieBootstrapUsingVirtualDisplay =
launchMode === 'virtual' || this.isManagedVirtualDisplay();
}

context.browser()?.once('disconnected', () => {
Expand Down Expand Up @@ -480,6 +489,7 @@ class BrowserService {
const running = this.cookieBootstrapActive && this.isContextReady(this.persistentContext);
const required = !ready;
const hasDisplay = this.hasSystemDisplay();
const usingManagedVirtualDisplay = this.isManagedVirtualDisplay();
const launchMode = running ? this.currentLaunchMode : null;

let message = 'Shared Google profile is ready for automated searches.';
Expand All @@ -498,7 +508,9 @@ class BrowserService {
running,
launchMode,
hasDisplay,
usingVirtualDisplay: running ? this.cookieBootstrapUsingVirtualDisplay : !hasDisplay,
usingVirtualDisplay: running
? this.cookieBootstrapUsingVirtualDisplay
: usingManagedVirtualDisplay || !hasDisplay,
activePages: this.activePages,
profileDir: this.profileDir,
usesSharedProfile: true,
Expand Down
7 changes: 7 additions & 0 deletions docs/setup-guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,13 @@ Important:

Google AI Search now uses the shared persistent browser profile managed by the API.

For Docker and VPS installs, the API container can now host its own managed X display plus a noVNC web client.

- set `VNC_HOST` to publish the browser session through Traefik
- open `https://your-vnc-host/vnc.html`
- sign in with `HEADLESSX_VNC_PASSWORD`
- leave `HEADLESSX_VNC_ALLOW_NO_PASSWORD=0` unless the route is private and temporary

The first time you use the Google operator:

1. Open `/playground/operators/google/ai-search`
Expand Down
17 changes: 17 additions & 0 deletions infra/docker/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ HTML_TO_MARKDOWN_HOST_PORT=38081
YT_ENGINE_HOST_PORT=38090
WEB_HOST_PORT=34872
API_HOST_PORT=38473
VNC_HOST_PORT=35900
VNC_WEB_HOST_PORT=36080

# Internal container listener ports
WEB_PORT=3000
Expand Down Expand Up @@ -49,3 +51,18 @@ INTERNAL_API_URL=http://api:8000

# Set this only when the dashboard is hosted on a custom origin.
# FRONTEND_URL=http://localhost:34872

# Managed browser display inside the API container.
HEADLESSX_ENABLE_MANAGED_DISPLAY=1
HEADLESSX_ENABLE_VNC=1
HEADLESSX_DISPLAY=:99
HEADLESSX_DISPLAY_WIDTH=1440
HEADLESSX_DISPLAY_HEIGHT=900
HEADLESSX_DISPLAY_DEPTH=24
HEADLESSX_VNC_PORT=5900
HEADLESSX_VNC_WEB_PORT=6080
VNC_HOST=headlessx-vnc.localhost
HEADLESSX_VNC_PUBLIC_URL=http://localhost:36080/vnc.html
HEADLESSX_VNC_PASSWORD=change-me
# Set to 1 only for trusted local-only debugging.
HEADLESSX_VNC_ALLOW_NO_PASSWORD=0
138 changes: 137 additions & 1 deletion infra/docker/api-entrypoint.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,137 @@
#!/bin/sh
set -eu

XVFB_PID=""
OPENBOX_PID=""
X11VNC_PID=""
NOVNC_PID=""
API_PID=""
VNC_PASSWORD_FILE=""

is_enabled() {
case "${1:-}" in
1|true|TRUE|yes|YES|on|ON)
return 0
;;
*)
return 1
;;
esac
}

cleanup_background() {
for pid in "$NOVNC_PID" "$X11VNC_PID" "$OPENBOX_PID" "$XVFB_PID"; do
if [ -n "$pid" ]; then
kill "$pid" 2>/dev/null || true
wait "$pid" 2>/dev/null || true
fi
done

if [ -n "$VNC_PASSWORD_FILE" ] && [ -f "$VNC_PASSWORD_FILE" ]; then
rm -f "$VNC_PASSWORD_FILE"
fi
}

forward_signal() {
if [ -n "$API_PID" ]; then
kill "$API_PID" 2>/dev/null || true
wait "$API_PID" 2>/dev/null || true
fi
}

wait_for_display() {
ATTEMPTS=0

until xdpyinfo -display "$DISPLAY" >/dev/null 2>&1; do
ATTEMPTS=$((ATTEMPTS + 1))

if [ "$ATTEMPTS" -ge 30 ]; then
echo "❌ Managed display ${DISPLAY} did not become ready."
return 1
fi

if [ -n "$XVFB_PID" ] && ! kill -0 "$XVFB_PID" 2>/dev/null; then
echo "❌ Xvfb exited before ${DISPLAY} became ready."
return 1
fi

sleep 1
done
}

start_vnc_stack() {
VNC_PORT="${HEADLESSX_VNC_PORT:-5900}"
VNC_WEB_PORT="${HEADLESSX_VNC_WEB_PORT:-6080}"

if [ -z "${HEADLESSX_VNC_PASSWORD:-}" ] && ! is_enabled "${HEADLESSX_VNC_ALLOW_NO_PASSWORD:-0}"; then
echo "❌ HEADLESSX_VNC_PASSWORD must be set when HEADLESSX_ENABLE_VNC=1."
echo " Set HEADLESSX_VNC_ALLOW_NO_PASSWORD=1 only for trusted local-only debugging."
return 1
fi

if [ -n "${HEADLESSX_VNC_PASSWORD:-}" ]; then
VNC_PASSWORD_FILE="$(mktemp /tmp/headlessx-vnc-pass.XXXXXX)"
x11vnc -storepasswd "$HEADLESSX_VNC_PASSWORD" "$VNC_PASSWORD_FILE" >/dev/null
# shellcheck disable=SC2086
x11vnc -display "$DISPLAY" -rfbport "$VNC_PORT" -forever -shared -xkb -noxrecord -noxfixes -noxdamage -rfbauth "$VNC_PASSWORD_FILE" >/tmp/headlessx-x11vnc.log 2>&1 &
else
echo "⚠️ HEADLESSX_VNC_PASSWORD is empty. The VNC session is not password protected."
x11vnc -display "$DISPLAY" -rfbport "$VNC_PORT" -forever -shared -xkb -noxrecord -noxfixes -noxdamage -nopw >/tmp/headlessx-x11vnc.log 2>&1 &
fi
X11VNC_PID=$!

websockify --web /usr/share/novnc/ "$VNC_WEB_PORT" "127.0.0.1:${VNC_PORT}" >/tmp/headlessx-novnc.log 2>&1 &
NOVNC_PID=$!

echo "✅ noVNC available at http://127.0.0.1:${VNC_WEB_PORT}/vnc.html"
if [ -n "${HEADLESSX_VNC_PUBLIC_URL:-}" ]; then
echo "🌐 Public noVNC URL: ${HEADLESSX_VNC_PUBLIC_URL}"
fi
}

start_managed_display() {
if [ "$(uname -s)" != "Linux" ]; then
return 0
fi

if ! is_enabled "${HEADLESSX_ENABLE_MANAGED_DISPLAY:-1}"; then
return 0
fi

export DISPLAY="${DISPLAY:-${HEADLESSX_DISPLAY:-:99}}"

if xdpyinfo -display "$DISPLAY" >/dev/null 2>&1; then
export HEADLESSX_VIRTUAL_DISPLAY_ACTIVE=1
echo "🖥️ Reusing existing display ${DISPLAY}"
return 0
fi

DISPLAY_WIDTH="${HEADLESSX_DISPLAY_WIDTH:-${BROWSER_WINDOW_WIDTH:-1440}}"
DISPLAY_HEIGHT="${HEADLESSX_DISPLAY_HEIGHT:-${BROWSER_WINDOW_HEIGHT:-900}}"
DISPLAY_DEPTH="${HEADLESSX_DISPLAY_DEPTH:-24}"

echo "🖥️ Starting managed display ${DISPLAY} (${DISPLAY_WIDTH}x${DISPLAY_HEIGHT}x${DISPLAY_DEPTH})"
Xvfb "$DISPLAY" -screen 0 "${DISPLAY_WIDTH}x${DISPLAY_HEIGHT}x${DISPLAY_DEPTH}" -ac -nolisten tcp +extension RANDR >/tmp/headlessx-xvfb.log 2>&1 &
XVFB_PID=$!

wait_for_display
export HEADLESSX_VIRTUAL_DISPLAY_ACTIVE=1

if command -v openbox >/dev/null 2>&1; then
openbox >/tmp/headlessx-openbox.log 2>&1 &
OPENBOX_PID=$!
fi

if is_enabled "${HEADLESSX_ENABLE_VNC:-1}"; then
start_vnc_stack
fi
}

trap 'forward_signal' INT TERM HUP
trap 'cleanup_background' EXIT

start_managed_display

cd /app/apps/api

MAX_ATTEMPTS="${PRISMA_MIGRATE_MAX_ATTEMPTS:-10}"
Expand All @@ -20,4 +151,9 @@ until pnpm exec prisma migrate deploy; do
done

echo "✅ Prisma migrations applied."
exec pnpm exec tsx src/server_entry.ts
pnpm exec tsx src/server_entry.ts &
API_PID=$!
wait "$API_PID"
EXIT_CODE=$?
API_PID=""
exit "$EXIT_CODE"
6 changes: 6 additions & 0 deletions infra/docker/api.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@ RUN corepack enable
RUN apt-get update && apt-get install -y \
build-essential \
xvfb \
x11vnc \
novnc \
websockify \
openbox \
x11-utils \
xauth \
libnss3 \
libnspr4 \
libatk1.0-0 \
Expand Down
14 changes: 14 additions & 0 deletions infra/docker/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -105,13 +105,27 @@ services:
- YT_ENGINE_TIMEOUT_MS=${YT_ENGINE_TIMEOUT_MS:-45000}
- BROWSER_WINDOW_WIDTH=${BROWSER_WINDOW_WIDTH:-}
- BROWSER_WINDOW_HEIGHT=${BROWSER_WINDOW_HEIGHT:-}
- DISPLAY=${DISPLAY:-${HEADLESSX_DISPLAY:-:99}}
- HEADLESSX_ENABLE_MANAGED_DISPLAY=${HEADLESSX_ENABLE_MANAGED_DISPLAY:-1}
- HEADLESSX_ENABLE_VNC=${HEADLESSX_ENABLE_VNC:-1}
- HEADLESSX_DISPLAY=${HEADLESSX_DISPLAY:-:99}
- HEADLESSX_DISPLAY_WIDTH=${HEADLESSX_DISPLAY_WIDTH:-1440}
- HEADLESSX_DISPLAY_HEIGHT=${HEADLESSX_DISPLAY_HEIGHT:-900}
- HEADLESSX_DISPLAY_DEPTH=${HEADLESSX_DISPLAY_DEPTH:-24}
- HEADLESSX_VNC_PORT=${HEADLESSX_VNC_PORT:-5900}
- HEADLESSX_VNC_WEB_PORT=${HEADLESSX_VNC_WEB_PORT:-6080}
- HEADLESSX_VNC_PASSWORD=${HEADLESSX_VNC_PASSWORD:-}
- HEADLESSX_VNC_ALLOW_NO_PASSWORD=${HEADLESSX_VNC_ALLOW_NO_PASSWORD:-0}
- HEADLESSX_VNC_PUBLIC_URL=${HEADLESSX_VNC_PUBLIC_URL:-http://localhost:36080/vnc.html}
- BULLMQ_QUEUE_NAME=${BULLMQ_QUEUE_NAME:-headlessx-jobs}
- QUEUE_WORKER_CONCURRENCY=${QUEUE_WORKER_CONCURRENCY:-2}
- QUEUE_JOB_ATTEMPTS=${QUEUE_JOB_ATTEMPTS:-3}
- QUEUE_JOB_BACKOFF_MS=${QUEUE_JOB_BACKOFF_MS:-5000}
- QUEUE_STREAM_POLL_MS=${QUEUE_STREAM_POLL_MS:-1000}
ports:
- "${API_HOST_PORT:-38473}:${PORT:-8000}"
- "${VNC_HOST_PORT:-35900}:${HEADLESSX_VNC_PORT:-5900}"
- "${VNC_WEB_HOST_PORT:-36080}:${HEADLESSX_VNC_WEB_PORT:-6080}"
healthcheck:
test:
[
Expand Down