-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathdocker-compose.jetson.yml
More file actions
98 lines (90 loc) · 2.92 KB
/
docker-compose.jetson.yml
File metadata and controls
98 lines (90 loc) · 2.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# SPDX-FileCopyrightText: Copyright (c) 2024–2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: BSD-2-Clause
name: nemotron-voice-agent-jetson
volumes:
nim_cache:
services:
# =============================================================================
# LLM Service
# =============================================================================
llm-nvidia-jetson:
runtime: nvidia
image: nvcr.io/nvidia/vllm:25.10-py3
container_name: llm-nvidia-jetson
env_file:
- .env
environment:
- HF_TOKEN=${HF_TOKEN}
ports:
- "9000:8000"
volumes:
- nim_cache:/root/.cache
shm_size: 16GB
restart: unless-stopped
entrypoint: ["/bin/bash", "-c"]
command:
- |
python3 -m vllm.entrypoints.openai.api_server \
--model "$${NVIDIA_LLM_MODEL:-RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16}" \
--host 0.0.0.0 \
--port 8000 \
--max-model-len 4096 \
--gpu-memory-utilization "$${GPU_MEMORY_UTILIZATION:-0.15}" \
--enforce-eager \
--trust-remote-code
healthcheck:
test: ["CMD-SHELL", "curl -f http://localhost:8000/health || exit 1"]
interval: 30s
timeout: 15s
retries: 10
start_period: 1200s
logging:
driver: "json-file"
options:
max-size: "50m"
max-file: "5"
# =============================================================================
# Voice Agent Application
# =============================================================================
python-app:
build:
context: .
dockerfile: Dockerfile
image: nvcr.io/nvidia/blueprint/nemotron-voice-agent:${IMAGE_VERSION:-dev}
container_name: voice-agent-webrtc-jetson
network_mode: host
env_file:
- .env
environment:
- NVIDIA_API_KEY=${NVIDIA_API_KEY}
command: >
bash -c "
if [ \"$${TRANSPORT:-WEBRTC}\" = \"WEBSOCKET\" ]; then
echo \"Starting WebSocket pipeline...\"
uv run src/pipeline_websocket.py --host 0.0.0.0 --port 7860 --workers $${WORKERS:-1}
else
echo \"Starting WebRTC pipeline...\"
uv run src/pipeline.py --host 0.0.0.0 --port 7860 --workers $${WORKERS:-1}
fi
"
volumes:
- ./audio_dumps:/app/audio_dumps
- ./config:/app/config
restart: unless-stopped
healthcheck:
test: ["CMD-SHELL", "curl -s http://localhost:7860/docs || exit 1"]
interval: 30s
timeout: 10s
retries: 3
# =============================================================================
# UI Application
# =============================================================================
ui-app:
build:
context: .
dockerfile: frontend/Dockerfile
image: nvcr.io/nvidia/blueprint/nemotron-voice-agent-ui:${IMAGE_VERSION:-dev}
container_name: webrtc-ui-jetson
ports:
- "8081:8000"
restart: unless-stopped