jf-sebastian/.env.example at main · pjdoland/jf-sebastian · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
# ============================================================================
# LAYERED CONFIGURATION
# ============================================================================
# Settings are loaded with three layers, highest precedence first:
#
#   1. personalities/{PERSONALITY}/.env   (per-personality overrides)
#   2. jf_sebastian/devices/{OUTPUT_DEVICE_TYPE}/.env  (per-device overrides)
#   3. .env (this file — base configuration)
#
# A typical use case: keep PERSONALITY-agnostic defaults here, put device-
# specific tuning (CONTROL_GAIN for Teddy, VOICE_GAIN for Squawkers) under
# the device bundle, and put personality-specific knobs (VOICE_GAIN for a
# quiet RVC model) under the personality's folder. Each overlay file
# is a normal .env file with the same key=value syntax as this one.
#
# PERSONALITY and OUTPUT_DEVICE_TYPE select which overlays load, so they
# MUST come from this file (or the process environment). Setting either
# inside an overlay file has no effect on overlay selection.
#
# Overlay files are git-ignored by the existing `.env` rule.

# ============================================================================
# PERSONALITY SELECTION
# ============================================================================
# Choose which AI personality to activate. Each personality has:
#   - Unique wake word phrase for activation
#   - Custom voice, speed, and speaking style
#   - Personality-specific system prompt and knowledge base
#   - Pre-recorded filler phrases for low-latency responses
#
# Available personalities:
#   - fred: Mister Rogers, gentle neighbor (Teddy Ruxpin voice)
#   - jarvis: Polished AI butler with a fully-tuned RVC voice
#   - johnny: Laid-back tiki bartender with cocktail expertise
#   - kitt: Knight Rider's talking car
#   - leopold: Eccentric conspiracy theorist with wild backstory
#   - mr_lincoln: Abraham Lincoln, 16th President (Disney animatronic homage)
#   - teddy_ruxpin: The 1985 storytelling bear, punchy and high-energy
#
# To create new personalities, see: personalities/README.md
PERSONALITY=johnny

# ============================================================================
# OPENAI API CONFIGURATION
# ============================================================================
# Your OpenAI API key for Whisper (speech-to-text), GPT (conversation),
# and TTS (text-to-speech) services.
#
# Get your API key from: https://platform.openai.com/api-keys
# Usage costs (approximate per conversation turn):
#   - Whisper: $0.006 per minute of audio
#   - GPT-4o-mini: $0.001-0.005 per turn
#   - TTS: $0.015 per 1000 characters
OPENAI_API_KEY=your_openai_api_key_here

# ============================================================================
# WAKE WORD DETECTION (OpenWakeWord)
# ============================================================================
# Wake word detection runs continuously in the background, listening for
# the personality's activation phrase (e.g., "Hey, Johnny").
#
# This system uses OpenWakeWord - a free, open-source wake word engine that
# runs entirely locally with no API key required. Each personality includes
# a trained wake word model (.onnx file) in its directory.
#
# To train custom wake words: docs/TRAIN_WAKE_WORDS.md

# ============================================================================
# AUDIO DEVICE CONFIGURATION
# ============================================================================
# Audio devices for microphone input and speaker/animatronic output.
#
# To list available devices, run:
#   python -m jf_sebastian.modules.audio_output   (lists all devices)
#   python -m jf_sebastian.modules.audio_input    (lists input devices only)
#
# Device names use case-insensitive partial matching. For example:
#   "MacBook" will match "MacBook Air Microphone"
#   "Arsvita" will match "Arsvita Car Audio Bluetooth"
#
# For Teddy Ruxpin, OUTPUT_DEVICE_NAME should be your Bluetooth cassette
# adapter (e.g., Arsvita). The system outputs stereo audio:
#   LEFT channel  = Voice audio (plays through Teddy's speaker)
#   RIGHT channel = PPM control signals (drives motors for mouth/eyes)
INPUT_DEVICE_NAME=MacBook Air Microphone
OUTPUT_DEVICE_NAME=Arsvita

# Output device type - determines audio processing pipeline
# Options:
#   - teddy_ruxpin: Stereo with PPM control signals (LEFT=voice, RIGHT=PPM)
#   - squawkers_mccaw: Simple stereo audio (LEFT=voice, RIGHT=voice)
#   - headless: Simple stereo audio for computer playback (no hardware)
#
# This setting controls which output device is used for audio generation.
# Each device type has different capabilities:
#   - Teddy Ruxpin includes full animatronic control (mouth, eyes)
#   - Squawkers McCaw plays audio without PPM motor control
#   - Headless plays simple stereo audio for computer playback (no hardware)
OUTPUT_DEVICE_TYPE=teddy_ruxpin

# Note: drop-in device packages (any extra folder under jf_sebastian/devices/)
# own their settings and document them in their own package; see each device's
# README. Per-device tuning goes in the device bundle (jf_sebastian/devices/<device>/.env), which is
# loaded automatically when that device is selected.

# ============================================================================
# AUDIO PROCESSING CONFIGURATION
# ============================================================================
# Sample rate for audio capture and processing (Hz)
# Valid values: 16000, 22050, 44100, or 48000
#
# Recommendations:
#   - 16000 Hz: Best for wake word detection (lower CPU usage)
#   - 44100 Hz: CD-quality audio, better for music/high-fidelity
#
# Note: Final output is always resampled to 44100 Hz for precise PPM timing
SAMPLE_RATE=16000

# Audio chunk size for real-time processing (samples)
# Smaller = lower latency but higher CPU usage
# Larger = higher latency but lower CPU usage
#
# Default 1024 provides good balance at 16kHz (64ms chunks)
CHUNK_SIZE=1024

# ============================================================================
# VOICE ACTIVITY DETECTION (VAD)
# ============================================================================
# VAD distinguishes speech from background noise and silence. Uses Silero
# VAD — a small neural-net classifier that's substantially harder to fool
# with sustained ambient noise (USB speaker hum, fans) than rule-based
# alternatives.

# Per-window speech probability cutoff (0.0-1.0)
# Each 32 ms window of audio is scored by Silero; if the score is at or
# above this value, the window is classified as speech.
#
#   0.3 = Lenient (more permissive, may flag faint sounds as speech)
#   0.5 = Balanced (default; works for most environments)
#   0.7 = Strict (only confident speech, may miss quiet/distant voice)
#
# Tuning guide:
# - Noise still leaking through? Raise toward 0.6-0.7.
# - Real speech being rejected? Lower toward 0.3-0.4.
VAD_THRESHOLD=0.5

# Maximum silence duration before returning to IDLE state (seconds)
# If you stop talking for this long, the system assumes you're done
# and returns to listening for the wake word.
#
# Increase if you need long pauses while speaking.
# Decrease for faster timeout in noisy environments.
SILENCE_TIMEOUT=5.0

# Silence duration required to end speech detection (seconds)
# After you start talking, this much continuous silence indicates
# you've finished your sentence.
#
# Too low = May cut off your sentence if you pause briefly
# Too high = Adds delay before processing starts
#
# 1.0s is a natural pause length that works well for most speakers
SPEECH_END_SILENCE_SECONDS=1.0

# Minimum time to listen after wake word detection (seconds)
# Prevents the system from ending speech detection too quickly
# if there's a brief pause right after the wake word.
#
# Example: "Hey Johnny... [pause] ... what's in a Mai Tai?"
# This setting ensures the pause doesn't prematurely end listening.
MIN_LISTEN_SECONDS=1.0

# Minimum RMS amplitude to send audio to Whisper for transcription
# First-stage filter that catches pure silence and very quiet audio.
# This saves API quota and prevents false transcriptions.
#
# Typical RMS values:
# - Normal speech: ~1500-5000
# - Quiet speech: ~800-1500
# - Background noise: ~100-500
# - Near silence: <100
#
# Tuning guide:
# - Too many quiet speech rejections? Lower to 500-700
# - Still transcribing silence? Raise to 1000-1500
# - Noisy environment? Raise to 1200-2000
# - Quiet environment? Lower to 600-800
MIN_AUDIO_RMS=60

# Minimum speech content ratio for VAD-based speech detection (0.0-1.0)
# Second-stage filter using Voice Activity Detection to analyze audio content.
# Calculates what percentage of audio frames contain actual speech vs. noise.
# Audio must contain at least this ratio of speech to proceed to Whisper.
#
# This prevents Whisper hallucinations ("Thank you", "Goodbye", etc.) that
# occur when transcribing background noise, music, or rustling sounds.
#
# Default 0.3 means 30% of audio frames must contain speech
# Lower values (0.2) = More permissive, may allow some noise through
# Higher values (0.4-0.5) = More strict, may reject brief utterances
#
# Tuning guide:
# - Getting "No speech detected" on valid speech? Lower to 0.2-0.25
# - Still getting "Thank you"/"Goodbye" hallucinations? Raise to 0.4-0.5
# - Brief commands being rejected? Lower to 0.2
MIN_SPEECH_RATIO=0.3

# ============================================================================
# WEATHER & LOCATION CONTEXT
# ============================================================================
# Weather is injected into LLM context so personalities can reference it
# naturally ("Nice day out there, isn't it?"). Cached 30 minutes.
#
# Upgrading from a previous version with only ZIPCODE set? Do nothing —
# auto-selection picks wttr automatically.
#
# WEATHER_PROVIDER values:
#   wttr           — wttr.in (free, no API key, US zipcode)
#   homeassistant  — your local Home Assistant weather entity
#   manual         — free-form text you provide (offline / testing)
#   none           — disable weather context entirely
#   auto / unset   — pick the first provider whose vars are configured
#                    (priority: homeassistant > wttr > manual)
#WEATHER_PROVIDER=wttr

# Provider: wttr.in (default; free, no API key, US zipcode required)
#ZIPCODE=90210

# Provider: Home Assistant
# - Long-lived access token: HA UI → Profile → Security → Long-Lived Access Tokens
# - Weather entity: HA UI → Developer Tools → States, filter "weather."
# - URL must include the port and be reachable from this machine.
#HOME_ASSISTANT_URL=http://homeassistant.local:8123
#HOME_ASSISTANT_TOKEN=your_long_lived_access_token_here
#HOME_ASSISTANT_WEATHER_ENTITY=weather.home

# Provider: Manual — free-form text passed verbatim into the LLM prompt as
# the weather description. No parsing; write whatever the personality should
# know (e.g., "Sunny, 72F, light breeze in Brooklyn"). Makes no network calls.
#MANUAL_WEATHER=Sunny and 72F

# ============================================================================
# NEWS HEADLINES
# ============================================================================
# Top headlines are injected into LLM context so personalities can naturally
# bring up "what's going on" in conversation. Cached 30 min by default.
#
# By default, news is ON with NPR Topics: News as the source. To turn it off,
# set NEWS_PROVIDER=none. To use a different feed, set NEWS_RSS_URL.
#
# Pick one provider, or leave NEWS_PROVIDER unset and auto-selection picks
# the first one configured (priority: rss > manual). Hacker News is excluded
# from auto-select since it's tech-focused — set NEWS_PROVIDER=hackernews to
# opt in.
#NEWS_PROVIDER=rss              # or hackernews / manual / none / auto

# Provider: RSS (default; any RSS or Atom feed)
# Examples:
#   NPR Topics: News    https://feeds.npr.org/1001/rss.xml   (default if unset)
#   BBC News (World)    http://feeds.bbci.co.uk/news/world/rss.xml
#   AP Top News         https://feeds.apnews.com/rss/apf-topnews
#   Reuters World       https://feeds.reuters.com/reuters/worldNews
#NEWS_RSS_URL=https://feeds.npr.org/1001/rss.xml

# Provider: Hacker News (free, no API key, tech-focused)
# Set NEWS_PROVIDER=hackernews — no other config required.

# Provider: Manual (newline-separated headlines; offline / testing — no network)
# Use literal \n between headlines:
#MANUAL_NEWS=Headline one\nHeadline two\nHeadline three

# Tuning (defaults usually fine)
#NEWS_HEADLINE_LIMIT=5          # max headlines injected per conversation
#NEWS_CACHE_TTL_MINUTES=30      # how long to reuse fetched headlines

# ============================================================================
# SPOTIFY PLAYBACK CONTROL (Optional)
# ============================================================================
# Lets a personality control music by voice ("play Quiet Village on Spotify",
# "play tiki music in the kitchen", "skip", "turn it up"). Commands an existing
# Spotify Connect speaker (an Echo, a phone, a Sonos, the desktop app) over the
# Web API; J.F. Sebastian does not produce the music audio itself.
#
# Requirements:
#   - Spotify Premium (the playback API is Premium-only)
#   - A free Spotify app (Client ID) from https://developer.spotify.com/dashboard
#     with the redirect URI below registered under the app's Settings
#   - A one-time browser login: `python scripts/spotify_auth.py`
#     (it caches a refresh token and prints your exact speaker names)
#
# Once this master switch is on, every personality gets the music tools by
# default; set `spotify_enabled: false` in a personality.yaml to exclude that
# character.
#
# Full walkthrough (including headless/Jetson token copy): docs/SPOTIFY_SETUP.md
#
# Master switch. Leave false to keep the music tools out of every personality.
SPOTIFY_ENABLED=false

# Client ID of your Spotify app (no client secret; auth uses PKCE).
#SPOTIFY_CLIENT_ID=your_spotify_client_id_here

# Redirect URI. Must match exactly what you register in the Spotify app's
# Settings. The default loopback address works for the local browser login.
#SPOTIFY_REDIRECT_URI=http://127.0.0.1:8888/callback

# Where the cached refresh token is written (chmod 0600). Keep it outside any
# synced personality/device bundle. Default: ~/.config/jf-sebastian/spotify-token.json
#SPOTIFY_TOKEN_CACHE=~/.config/jf-sebastian/spotify-token.json

# Speaker used when a command doesn't name one ("play some jazz"). Use an exact
# device name from `python scripts/spotify_auth.py`. If unset, the currently
# active Spotify device is used.
#SPOTIFY_DEFAULT_DEVICE=Living Room

# Friendly spoken aliases mapped to exact device names, comma-separated. Lets
# people say "the den" for a device actually named "Living Room Echo".
#SPOTIFY_DEVICE_ALIASES=kitchen=Kitchen Echo,den=Living Room

# Inject the currently-playing track into the conversation context each turn so
# the personality can answer questions about it ("what's this song?"). Fetched
# live with a short cache so it stays current; the quick lookup is masked by the
# filler audio. Default true; set false to keep playback control without the
# always-on now-playing context.
#SPOTIFY_NOW_PLAYING_CONTEXT=true

# ============================================================================
# PROACTIVE SCHEDULER
# ============================================================================
# When enabled, personalities can define scheduled events in their own
# personalities/<name>/scheduled_events.yaml — proactive greetings, bedtime
# stories, holiday surprises, etc. Events only fire when the device is IDLE
# (never interrupts an in-progress conversation).
#
# Set SCHEDULER_ENABLED=false to globally disable proactive speech.
SCHEDULER_ENABLED=true

# Optional global quiet hours (HH:MM, 24-hour). When set, override the per-
# personality quiet_hours block. Window can wrap midnight (e.g., 22:00 → 07:00).
#QUIET_HOURS_START=22:00
#QUIET_HOURS_END=07:00

# ============================================================================
# CONVERSATION SETTINGS
# ============================================================================
# Timeout to clear conversation history (seconds)
# If no interaction occurs for this duration, the conversation context
# is cleared and the system starts fresh.
#
# This prevents the AI from remembering stale context from old conversations
# while allowing natural back-and-forth within a session.
CONVERSATION_TIMEOUT=120.0

# Maximum conversation history to maintain (message count)
# Limits how many previous messages are sent to GPT for context.
#
# Higher = More context, better conversation coherence, higher API costs
# Lower = Less context, may forget recent topics, lower API costs
#
# Each message pair (user + assistant) counts as 2 messages.
# 20 messages = ~10 conversation turns of context.
MAX_HISTORY_LENGTH=20

# Minimum chunk length for streaming responses (word count)
# When streaming LLM responses, text is chunked by complete sentences.
# Each chunk will contain the minimum number of sentences that exceeds
# this word count threshold, enabling efficient TTS processing.
#
# Lower = More frequent, smaller chunks (faster initial response)
# Higher = Fewer, larger chunks (more efficient TTS batching)
#
# Default 15 words provides good balance for natural-sounding speech
# while maintaining low latency for the first chunk.
MIN_CHUNK_WORDS=15

# Maximum tokens for GPT response generation
# Controls the maximum length of AI responses.
#
# IMPORTANT - Token limits work differently for GPT-4 vs the GPT-5 family:
#
# GPT-4 models (gpt-4o-mini, gpt-4o, gpt-3.5-turbo):
#   - Use these values directly as configured
#   - 200-300 tokens is sufficient for most responses
#
# GPT-5 family (gpt-5.4-mini default, gpt-5, etc.):
#   - The configured value is auto-scaled up (minimum ~3500) because GPT-5
#     accounts for tokens differently. This happens in code, no action needed.
#   - Example: 200 configured -> 3600 used, 300 -> 5400 used.
#   - It is only a ceiling; short conversational replies use far fewer tokens,
#     so a high ceiling does not by itself slow responses or cost more.
#
# Higher = Longer, more detailed responses, higher API costs
# Lower = Shorter, more concise responses, lower API costs
MAX_TOKENS=300

# Maximum tokens for streaming GPT responses
# Typically lower than MAX_TOKENS since streaming enables earlier playback.
#
# Same auto-scaling rules apply:
#   - GPT-4: Uses value as configured (200 tokens)
#   - GPT-5 family: Auto-scaled up to the ~3500 floor (200 -> 3600)
#
# Default 200 provides good balance between response length and latency.
MAX_TOKENS_STREAMING=200

# Enable filler audio playback
# When true, plays pre-recorded filler phrases while LLM generates response.
# When false, waits silently for the full response (higher perceived latency).
#
# Filler audio provides natural conversational flow by filling the gap between
# your question and the AI's response, but some users may prefer to wait in
# silence instead of hearing the filler phrases.
#
# true = Play filler audio (lower perceived latency, more natural)
# false = Wait silently (higher perceived latency, no filler phrases)
ENABLE_FILLER_AUDIO=true

# ============================================================================
# OPENAI MODEL CONFIGURATION
# ============================================================================
# OpenAI API models for speech recognition, conversation, and synthesis

# Whisper model for speech-to-text transcription
# Currently only "whisper-1" is available via API
# Extremely accurate for most accents and audio conditions
WHISPER_MODEL=whisper-1

# GPT model for generating conversational responses
# Options:
#   - gpt-5.4-mini: Newest small model; fast first token and strong quality (RECOMMENDED, default)
#   - gpt-4o-mini: Fast, very cheap, great quality; the proven fallback
#   - gpt-4o: More capable but roughly 10x the cost of gpt-4o-mini
#   - gpt-3.5-turbo: Cheaper but lower quality responses
#
# Latency note: benchmarked in this pipeline, gpt-5.4-mini's time-to-first-token
# was at or below gpt-4o-mini (sub-second), so it suits real-time conversation.
# Do NOT confuse it with the original gpt-5-mini (Aug 2025), which was slow
# (~15-20s); gpt-5.4-mini is a different, much faster model.
#
# Fall back to GPT_MODEL=gpt-4o-mini if your account lacks gpt-5.4-mini access
# or you want the lowest cost (verify current per-token pricing for your model).
#
# GPT-5 family differences from GPT-4 (handled automatically in code):
#   - Uses max_completion_tokens instead of max_tokens
#   - Only supports the default temperature (1.0), not custom values
#   - Needs higher token limits, so MAX_TOKENS is auto-scaled up (see below)
GPT_MODEL=gpt-5.4-mini

# Reasoning effort for the GPT-5 family (low / medium / high).
# Lower = faster first token (best for real-time conversation); higher = more
# deliberate but slower. 'low' is the default and keeps replies snappy.
# Note: gpt-5.4-mini does not accept 'minimal'. Leave this empty to omit the
# parameter and use the model's own default. Ignored for GPT-4 models, which
# have no reasoning parameter.
GPT_REASONING_EFFORT=low

# Text-to-speech model for voice synthesis
# Options:
#   - gpt-4o-mini-tts: New model with tone/style control (recommended)
#   - tts-1: Fast, lower quality, no tone control
#   - tts-1-hd: Slower, higher quality, no tone control
#
# Note: Voice, speed, and style are defined per-personality in the
# personality.yaml file, not here. The gpt-4o-mini-tts model supports
# natural language instructions for accent, tone, emotion, etc.
TTS_MODEL=gpt-4o-mini-tts

# ============================================================================
# ANIMATRONIC CONTROL SETTINGS
# ============================================================================
# Configuration for Teddy Ruxpin motor control via PPM signals
#
# The system generates stereo audio where:
#   LEFT channel  = Voice audio (what Teddy says)
#   RIGHT channel = PPM control track (pulse position modulation signals)
#
# PPM signals encode 8 channels of motor control at 60Hz frame rate:
#   Channel 1: Mouth position (syllable-based lip sync)
#   Channel 2: Eye position (sentiment-based expressions)
#   Channels 3-8: Reserved for future use (blinks, head movement, etc.)

# Audio playback preroll (milliseconds)
# Adds a small delay before audio playback starts to prevent clipping
# the first syllable. Compensates for audio buffer initialization time.
#
# Too low = First word may be cut off
# Too high = Noticeable delay before speech starts
#
# 240ms works well for most Bluetooth audio devices
PLAYBACK_PREROLL_MS=240

# Wait this long after playback finishes before re-opening the microphone
# for the next conversation turn (milliseconds). Covers speaker buffer
# drain and acoustic decay; without it, residual bot audio leaks into the
# mic and VAD treats it as the user's next utterance.
#
# Increase if the bot still triggers on its own tail audio.
# Decrease for snappier turn-taking once tail leakage is acceptable.
PLAYBACK_TAIL_GUARD_MS=500

# Voice audio volume multiplier (0.0 to 2.0)
# Controls the loudness of the spoken voice in the LEFT channel.
#
# 1.0 = Normal volume (no change)
# >1.0 = Louder (boost volume)
# <1.0 = Quieter (reduce volume)
#
# Default 1.05 provides a slight boost for clearer playback through
# Teddy's small speaker while avoiding distortion.
VOICE_GAIN=1.05

# Control track volume multiplier (0.0 to 1.0)
# Controls the amplitude of PPM signals in the RIGHT channel.
#
# Too high = Motors may move too aggressively or cause mechanical stress
# Too low = Motors may not respond or move weakly
#
# Default 0.52 provides smooth motor movement while minimizing audio
# bleed-through from the control track into the voice channel.
CONTROL_GAIN=0.52

# Sentiment analysis thresholds for eye expressions
# The system analyzes the emotional tone of responses using VADER sentiment
# analysis and adjusts eye position accordingly:
#
#   Positive sentiment (happy/excited) = Eyes wide open
#   Neutral sentiment = Normal eye position
#   Negative sentiment (sad/concerned) = Eyes partially closed
#
# Threshold values range from -1.0 (very negative) to +1.0 (very positive)

# Positive sentiment threshold
# Response sentiment above this value triggers wide-open eyes
# Lower = Eyes open more frequently (more enthusiastic)
# Higher = Eyes only open for very positive responses
SENTIMENT_POSITIVE_THRESHOLD=0.3

# Negative sentiment threshold
# Response sentiment below this value triggers partially-closed eyes
# Higher (closer to 0) = Eyes close more frequently
# Lower (more negative) = Eyes only close for very sad responses
SENTIMENT_NEGATIVE_THRESHOLD=-0.3

# ============================================================================
# WAKE WORD DETECTION THRESHOLD
# ============================================================================
# Detection confidence threshold (0.0 to 1.0)
# Controls how confident the system must be before triggering on wake word.
#
# Higher (0.95-0.99) = Fewer false positives, may miss some valid wake words
# Lower (0.80-0.90) = More sensitive, but may trigger on similar sounds
#
# Default 0.99 is very strict and works well in most environments.
# Reduce to 0.95 if you find yourself having to repeat the wake word often.
# Increase to 0.995 in very noisy environments with frequent false triggers.
WAKE_WORD_THRESHOLD=0.99

# ============================================================================
# RVC VOICE CONVERSION (Optional)
# ============================================================================
# RVC (Retrieval-based Voice Conversion) allows you to apply custom trained
# voice models to transform the TTS output, enabling truly unique character
# voices that go beyond what OpenAI TTS can provide.
#
# RVC is configured per-personality in the personality.yaml file. This global
# setting provides an override to disable RVC entirely if needed.
#
# To install RVC dependencies:
#   ./scripts/install_rvc.sh
# Or manually:
#   pip install pip==24.0 && pip install -r requirements-rvc.txt
#
# To use RVC for a personality:
# 1. Install RVC dependencies (see above)
# 2. Obtain or train an RVC model (.pth file) for the desired voice
# 3. Place the model in the personality directory or global rvc_models/ folder
# 4. Add RVC configuration to the personality's personality.yaml file
#
# See docs/CREATING_PERSONALITIES.md for detailed RVC setup instructions.

# Global RVC enable/disable override
# Set to false to disable RVC for all personalities (useful for debugging)
# Individual personalities can still enable/disable RVC via personality.yaml
RVC_ENABLED=true

# RVC inference device (auto/cpu/mps/cuda)
# - auto: Automatically detect best available device (CUDA > MPS > CPU)
# - cpu: Works everywhere, slower
# - mps: Apple Silicon GPU (M1/M2/M3) - 3-5x faster than CPU
# - cuda: NVIDIA GPU (requires NVIDIA hardware, e.g. Jetson, desktop GPU)
RVC_DEVICE=auto

# Global RVC model directory
# RVC models can be placed here and shared across personalities
# Models in personality directories take precedence over global models
# RVC_MODEL_DIR=./rvc_models/

# ============================================================================
# DEBUG SETTINGS
# ============================================================================
# Development and troubleshooting configuration

# Enable debug mode for detailed logging
# When true, enables verbose logging throughout the application for
# troubleshooting issues.
DEBUG_MODE=false

# Logging level
# Controls the verbosity of log messages written to console and jf_sebastian.log
#
# Options (from most to least verbose):
#   - DEBUG: Everything including internal state changes
#   - INFO: General operational messages (recommended for normal use)
#   - WARNING: Only warnings and errors
#   - ERROR: Only error messages
LOG_LEVEL=INFO

# Save audio files for debugging
# When true, saves input and output audio files to DEBUG_AUDIO_PATH
# for inspection in audio editors like Audacity.
#
# Input files: Captured microphone audio (your voice)
# Output files: Stereo audio for your device (format depends on OUTPUT_DEVICE_TYPE)
#
# Useful for diagnosing:
#   - Microphone pickup issues
#   - VAD sensitivity problems
#   - PPM signal generation (view control track waveform)
#   - Audio quality and volume levels
SAVE_DEBUG_AUDIO=false

# Directory for saved debug audio files
# Files are named with timestamps: input_YYYYMMDD_HHMMSS.wav
DEBUG_AUDIO_PATH=./debug_audio/

# ============================================================================
# SUPERVISOR / WATCHDOG (scripts/supervisor.py)
# ============================================================================
# When you run J.F. Sebastian under scripts/supervisor.py (recommended for
# unattended deployments), the supervisor restarts the process on crash and
# kills it if it stops responding. The supervisor reads the heartbeat file
# the main process writes to detect hangs.
#
# To opt in to liveness reporting, set HEARTBEAT_FILE. With it unset, no
# heartbeat thread runs and the watchdog can only detect crashes (not hangs).
#
# All supervisor settings have sensible defaults; uncomment to override.
#HEARTBEAT_FILE=/tmp/jf_sebastian.heartbeat
#HEARTBEAT_INTERVAL=10.0
#WATCHDOG_TIMEOUT=60.0
#RESTART_BACKOFF_INITIAL=1.0
#RESTART_BACKOFF_MAX=60.0
#CRASH_REPORT_DIR=./crash_reports/
#CRASH_REPORT_TAIL=100