nemotron-voice-agent/config/env.example at main · NVIDIA-AI-Blueprints/nemotron-voice-agent · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
# ============================================================================
# NEMOTRON VOICE AGENT - ENVIRONMENT CONFIGURATION
# ============================================================================
# Copy this file to .env and configure the values for your deployment.
# Variables with values are defaults; commented variables are optional overrides.


# ----------------------------------------------------------------------------
# REQUIRED CREDENTIALS
# ----------------------------------------------------------------------------

# Your NVIDIA API key from https://build.nvidia.com
# Export it as an environment variable in your shell:
#   export NVIDIA_API_KEY=<your-nvidia-api-key>
# Docker Compose will automatically use the environment variable if this is left empty.
NVIDIA_API_KEY=

# ----------------------------------------------------------------------------
# TURN SERVER CREDENTIALS
# ----------------------------------------------------------------------------

TURN_SERVER_URL=
TURN_USERNAME=
TURN_PASSWORD=


# ----------------------------------------------------------------------------
# DOCKER IMAGE CONFIGURATION
# ----------------------------------------------------------------------------

# Version tag for the python-app and ui container image
IMAGE_VERSION=1.0.0

# ----------------------------------------------------------------------------
# PIPELINE CONFIGURATION
# ----------------------------------------------------------------------------

# Transport mode for the voice agent: "WEBSOCKET" or "WEBRTC"
# WEBSOCKET: Uses FastAPI WebSocket transport (pipeline_websocket.py)
# WEBRTC: Uses WebRTC transport (pipeline.py) - Default
TRANSPORT=WEBRTC

# Path to the prompt catalog YAML file containing system prompts
PROMPT_FILE_PATH=./config/prompt.yaml

# Enable speculative speech processing for lower response latency
# When enabled, the bot starts generating responses before user finishes speaking
ENABLE_SPECULATIVE_SPEECH=true

# Voice Activity Detection (VAD) engine: "ASR" (recommended) or "Silero"
VAD_PROFILE=ASR

# Maximum conversation turns to retain in context
# For multilingual or emotion-aware use cases, set the limit to 3-5 for best accuracy
CHAT_HISTORY_LIMIT=20

# Enable multilingual mode (requires compatible ASR/TTS models)
# When true, set SYSTEM_PROMPT_SELECTOR to: llama-3.3-nemotron-super-49b-v1.5/multilingual_voice_assistant
ENABLE_MULTILINGUAL=false

# Audio dump configuration for debugging and analysis
# Enable to save raw audio streams to disk (disabled by default)
ENABLE_ASR_AUDIO_DUMP=false
ENABLE_TTS_AUDIO_DUMP=false
# Directory where audio dumps are saved (only used when dumps are enabled)
# NOTE: If Docker creates this folder with different user permissions, accessing it
# later via another Docker container or Python deployment may cause permission errors. To fix:
#   1. Pre-create the folder before enabling: mkdir -p ./audio_dumps
#   2. Or fix permissions: sudo chown -R $(id -u):$(id -g) ./audio_dumps
AUDIO_DUMP_PATH=./audio_dumps

# JSON file containing word-to-IPA mappings for pronunciation correction
TTS_IPA_FILE_PATH=./config/ipa.json

# Number of 10ms audio chunks to buffer for output (controls audio latency)
# Default: 5 chunks (50ms buffer) for WebRTC - optimized for low latency
# WebSocket: 10 chunks (100ms buffer) - more stable for network variations
# High Concurrency: 10-40 chunks (100-400ms buffer) - prevents audio glitches under load
AUDIO_OUT_10MS_CHUNKS=5

# Number of workers for HTTP server (handles concurrent connections)
WORKERS=4

# ----------------------------------------------------------------------------
# OPENTELEMETRY TRACING
# ----------------------------------------------------------------------------
ENABLE_TRACING=false
OTEL_CONSOLE_EXPORT=false
# Add Endpoint for OTEL Collector.
# For Phoenix local deployment: `docker run -p 6006:6006 -p 4317:4317 -i -t arizephoenix/phoenix:latest`
# For gRPC (port 4317): Use host:port format (e.g., localhost:4317 or phoenix:4317)
# For HTTP (port 4318): Use http://host:port format (e.g., http://localhost:4318)
#OTEL_EXPORTER_OTLP_ENDPOINT=phoenix:4317

# ----------------------------------------------------------------------------
# ASR (AUTOMATIC SPEECH RECOGNITION) CONFIGURATION
# ----------------------------------------------------------------------------

# Docker image for the ASR NIM container
ASR_DOCKER_IMAGE=nvcr.io/nim/nvidia/parakeet-1-1b-ctc-en-us:1.4.0

# Custom ASR endpoint URL (uncomment to override Docker service)
# Example: grpc.nvcf.nvidia.com:443 (cloud) or localhost:50152 (local NIM)
#ASR_SERVER_URL=

# Cloud ASR Function ID (required for cloud NIMs only)
# Non-multilingual: 1598d209-5e27-4d3c-8079-4751568b1081
# Multilingual: 71203149-d3b7-4460-8231-1be2543a1fca
ASR_CLOUD_FUNCTION_ID=1598d209-5e27-4d3c-8079-4751568b1081

# ASR model identifier
ASR_MODEL_NAME=parakeet-1.1b-en-US-asr-streaming-silero-vad-sortformer

# Docker Compose specific: model configuration tags
ASR_NIM_TAGS=mode=str,vad=silero


# ----------------------------------------------------------------------------
# TTS (TEXT-TO-SPEECH) CONFIGURATION
# ----------------------------------------------------------------------------

# Docker image for the TTS NIM container
TTS_DOCKER_IMAGE=nvcr.io/nim/nvidia/magpie-tts-multilingual:1.6.0

# Custom TTS endpoint URL (uncomment to override Docker service)
# Example: grpc.nvcf.nvidia.com:443 (cloud) or localhost:50151 (local NIM)
#TTS_SERVER_URL=

# Default voice identifier (format: Model.Language.VoiceName)
TTS_VOICE_ID=Magpie-Multilingual.EN-US.Aria

# TTS model identifier
TTS_MODEL_NAME=magpie_tts_ensemble-Magpie-Multilingual

# Language code for speech synthesis
TTS_LANGUAGE=en-US

# Enable TTS text filter for cleaning special or unsupported characters for Magpie TTS
# Tune your own cleaning rules by creating custom Text Filter
# Only applies when TTS_LANGUAGE is set to en-US and ENABLE_MULTILINGUAL=false; automatically disabled for other languages
ENABLE_TTS_TEXT_FILTER=true

# Docker Compose specific: model configuration tags
TTS_NIM_TAGS=name=magpie-tts-multilingual,batch_size=32

# ---- Zero-shot TTS Magpie Model (uncomment ONLY if using zero-shot TTS Magpie) ----
## Path to reference audio file for zero-shot voice cloning
#ZERO_SHOT_AUDIO_PROMPT=
## Voice ID for zero-shot TTS model
#TTS_VOICE_ID=Magpie-ZeroShot.Female-1
## TTS model ID for zero-shot
#TTS_MODEL_NAME=magpie_tts_ensemble-Magpie-ZeroShot
## Docker Compose specific: model configuration tags
#TTS_NIM_TAGS=name=magpie-tts-zeroshot,batch_size=32


# ============================================================================
# LLM (LARGE LANGUAGE MODEL) CONFIGURATION
# ============================================================================
# Select ONE model configuration below by uncommenting the appropriate block.
# Each model has different capabilities and resource requirements.


# ----------------------------------------------------------------------------
# OPTION 1: Nemotron-3-Nano
# ----------------------------------------------------------------------------

NVIDIA_LLM_IMAGE=nvcr.io/nim/nvidia/nemotron-3-nano:1.7.0-variant
# Custom LLM endpoint URL (uncomment to override Docker service)
# Example: https://integrate.api.nvidia.com/v1 (cloud) or http://localhost:8000/v1 (local NIM)
#NVIDIA_LLM_URL=
# Use nvidia/nemotron-3-nano-30b-a3b as model name for cloud endpoint (NVCF)
NVIDIA_LLM_MODEL=nvidia/nemotron-3-nano
TEMPERATURE=1.0
TOP_P=1.0
ENABLE_THINKING=false
# Maximum tokens for LLM response (use 8192 when thinking is enabled, 2048 otherwise)
MAX_TOKENS=2048
NIM_ENABLE_BUDGET_CONTROL=1
NIM_ENABLE_KV_CACHE_REUSE=1
SYSTEM_PROMPT_SELECTOR=nemotron-3-nano/generic_voice_assistant


# ----------------------------------------------------------------------------
# OPTION 2: Llama-3.3-Nemotron-Super-49B
# ----------------------------------------------------------------------------
# Powerful model for complex reasoning and multilingual support
# Recommended for: multilingual mode, advanced conversations

# NVIDIA_LLM_IMAGE=nvcr.io/nim/nvidia/llama-3.3-nemotron-super-49b-v1.5:1.15.4
# # Custom LLM endpoint URL (uncomment to override Docker service)
# # Example: https://integrate.api.nvidia.com/v1 (cloud) or http://localhost:8000/v1 (local NIM)
# #NVIDIA_LLM_URL=
# NVIDIA_LLM_MODEL=nvidia/llama-3.3-nemotron-super-49b-v1.5
# TEMPERATURE=0
# TOP_P=1.0
# NIM_ENABLE_KV_CACHE_REUSE=1
# SYSTEM_PROMPT_SELECTOR=llama-3.3-nemotron-super-49b-v1.5/generic_voice_assistant
# # Multilingual Voice Agent, set ENABLE_MULTILINGUAL as true and use below SYSTEM_PROMPT_SELECTOR
# #SYSTEM_PROMPT_SELECTOR=llama-3.3-nemotron-super-49b-v1.5/multilingual_voice_assistant


# ----------------------------------------------------------------------------
# OPTION 3: Nemotron-Nano-9B-v2
# ----------------------------------------------------------------------------

# NVIDIA_LLM_IMAGE=nvcr.io/nim/nvidia/nvidia-nemotron-nano-9b-v2:1.12.2
# # Custom LLM endpoint URL (uncomment to override Docker service)
# # Example: https://integrate.api.nvidia.com/v1 (cloud) or http://localhost:8000/v1 (local NIM)
# #NVIDIA_LLM_URL=
# NVIDIA_LLM_MODEL=nvidia/nvidia-nemotron-nano-9b-v2
# TEMPERATURE=0
# TOP_P=1.0
# SYSTEM_PROMPT_SELECTOR=nvidia-nemotron-nano-9b-v2/generic_voice_assistant


# ----------------------------------------------------------------------------
# OPTION 4: Llama-3.1-8b-Instruct
# ----------------------------------------------------------------------------
# Lightweight, non-reasoning Llama model

# NVIDIA_LLM_IMAGE=nvcr.io/nim/meta/llama-3.1-8b-instruct:1.15.4
# # Custom LLM endpoint URL (uncomment to override Docker service)
# # Example: https://integrate.api.nvidia.com/v1 (cloud) or http://localhost:8000/v1 (local NIM)
# #NVIDIA_LLM_URL=
# NVIDIA_LLM_MODEL=meta/llama-3.1-8b-instruct
# NIM_ENABLE_KV_CACHE_REUSE=1
# SYSTEM_PROMPT_SELECTOR=llama-3.1-8b-instruct/generic_voice_assistant