Skip to content

Commit 997d329

Browse files
authored
feat(tts-native): platform-native TTS backends for macOS, Linux, Windows (#487)
Adds the two backend scripts that the TTS pipeline (PR #442) was already trying to invoke: `scripts/tts-native.sh` for macOS `say`, Linux piper / espeak-ng, and Git-Bash-on-Windows; `scripts/tts-native.ps1` for SAPI5 on native Windows. After this lands, `peon tts on` actually speaks. Implements the ADR-001 backend contract: - Speech text in on stdin (sidesteps shell quoting on user-controlled template output) - Numeric rate/volume go through `awk -v` so a hostile config value can never inject awk source - Synthesis errors, missing engines, unsupported platforms all exit 0 (TTS never blocks the IDE hook) Linux dispatcher prefers piper-with-model over espeak-ng; piper sample rate is read from the .onnx.json sidecar (default 22050). MSYS2/MINGW bridges to `tts-native.ps1` via `powershell.exe` rather than re-implementing SAPI5 in bash. Rides along: `install.ps1` shim generation now probes for `pwsh` first and falls back to `powershell.exe`, fixing `peon …` invocations on dev boxes where PSModulePath has PS 7 paths ahead of PS 5.1 inbox paths and `Get-ExecutionPolicy` blows up loading the wrong Microsoft.PowerShell.Security. Test harness improvements bundled in: - `tests/setup.bash` resolves `$PEON_PY` via `command -v` instead of hardcoding `/usr/bin/python3` (four call sites) - `tests/peon-packs.Tests.ps1` canonicalizes `$env:TEMP` through Push-Location to expand 8.3 short names on GitHub Windows runners - `Set-Location -LiteralPath` so paths with bracket characters don't break Test coverage: 42 BATS scenarios (`tests/tts-native.bats`) and 40 Pester scenarios (`tests/tts-native.Tests.ps1`) covering platform dispatch, engine priority, unit conversions and clamping, voice resolution, stdin pipeline binding (in-process and -File invocation paths), and error containment. Both rely on `PEON_TTS_DRY_RUN` / `PEON_TTS_TRACE_FILE` plus PATH-shadowed engine stubs rather than a live SAPI5 / espeak-ng. Closes the gap left after PR #442. Thanks @muunkky for the thorough writeup and the pwsh-fallback fix.
1 parent f823405 commit 997d329

9 files changed

Lines changed: 1767 additions & 10 deletions

install.ps1

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,22 @@ if (Test-Path $hookHandleUsePs1Source) {
347347
}
348348
}
349349

350+
# --- Install tts-native.ps1 (Windows SAPI5 TTS backend) ---
351+
$ttsNativeSource = Join-Path $ScriptDir "scripts\tts-native.ps1"
352+
$ttsNativeTarget = Join-Path $scriptsDir "tts-native.ps1"
353+
354+
if (Test-Path $ttsNativeSource) {
355+
# Local install: copy from repo
356+
Copy-Item -Path $ttsNativeSource -Destination $ttsNativeTarget -Force
357+
} else {
358+
# One-liner install: download from GitHub
359+
try {
360+
Invoke-WebRequest -Uri "$RepoBase/scripts/tts-native.ps1" -OutFile $ttsNativeTarget -UseBasicParsing -ErrorAction Stop
361+
} catch {
362+
Write-Host " Warning: Could not download tts-native.ps1" -ForegroundColor Yellow
363+
}
364+
}
365+
350366
# --- Install the main hook script (PowerShell) ---
351367
$hookScript = @'
352368
# peon-ping hook for Claude Code (Windows native)
@@ -3112,9 +3128,19 @@ foreach ($adapterFile in $adapterFiles) {
31123128
Write-Host " Installed $($adapterFiles.Count) adapter scripts to $adaptersDir"
31133129

31143130
# --- Install CLI shortcut ---
3131+
# Prefer pwsh (PowerShell 7+) when available, fall back to Windows PowerShell 5.1.
3132+
# pwsh has its own clean module path; powershell.exe can fail when PSModulePath
3133+
# leaks PS 7 module dirs in front of the 5.1 inbox modules (seen on dev
3134+
# environments where CloudSDK or similar polluted PSModulePath) — symptom is
3135+
# "Microsoft.PowerShell.Security module could not be loaded" on Get-ExecutionPolicy.
31153136
$peonCli = @"
31163137
@echo off
3117-
powershell -NoProfile -NonInteractive -Command "& '%USERPROFILE%\.claude\hooks\peon-ping\peon.ps1' %*"
3138+
where pwsh >nul 2>&1
3139+
if %ERRORLEVEL% equ 0 (
3140+
pwsh -NoProfile -NonInteractive -Command "& '%USERPROFILE%\.claude\hooks\peon-ping\peon.ps1' %*"
3141+
) else (
3142+
powershell -NoProfile -NonInteractive -Command "& '%USERPROFILE%\.claude\hooks\peon-ping\peon.ps1' %*"
3143+
)
31183144
"@
31193145
$cliBinDir = Join-Path $env:USERPROFILE ".local\bin"
31203146
if (-not (Test-Path $cliBinDir)) {
@@ -3125,13 +3151,19 @@ $cliBatPath = Join-Path $cliBinDir "peon.cmd"
31253151
$utf8NoBom = New-Object System.Text.UTF8Encoding $false
31263152
[System.IO.File]::WriteAllLines($cliBatPath, $peonCli.Split("`n"), $utf8NoBom)
31273153

3128-
# Also create a bash-compatible script for Git Bash / WSL
3129-
# Use the actual Windows path (resolved at install time) to avoid path translation issues
3154+
# Also create a bash-compatible script for Git Bash / WSL.
3155+
# Use the actual Windows path (resolved at install time) to avoid path translation issues.
3156+
# Same pwsh-then-powershell preference as peon.cmd above.
31303157
$peonPs1Path = Join-Path $InstallDir "peon.ps1"
31313158
$peonShScript = @"
31323159
#!/usr/bin/env bash
31333160
# peon-ping CLI wrapper for Git Bash / WSL / Unix shells on Windows
3134-
powershell.exe -NoProfile -NonInteractive -Command "& '$peonPs1Path' `$*"
3161+
if command -v pwsh >/dev/null 2>&1; then
3162+
PS_EXE=pwsh
3163+
else
3164+
PS_EXE=powershell.exe
3165+
fi
3166+
"`$PS_EXE" -NoProfile -NonInteractive -Command "& '$peonPs1Path' `$*"
31353167
"@
31363168
$peonShPath = Join-Path $cliBinDir "peon"
31373169
[System.IO.File]::WriteAllLines($peonShPath, $peonShScript.Split("`n"), $utf8NoBom)

install.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -591,6 +591,7 @@ else
591591
curl -fsSL "$REPO_BASE/scripts/pack-download.sh" -o "$INSTALL_DIR/scripts/pack-download.sh" 2>/dev/null || true
592592
curl -fsSL "$REPO_BASE/scripts/mac-overlay.js" -o "$INSTALL_DIR/scripts/mac-overlay.js" 2>/dev/null || true
593593
curl -fsSL "$REPO_BASE/scripts/notify.sh" -o "$INSTALL_DIR/scripts/notify.sh" 2>/dev/null || true
594+
curl -fsSL "$REPO_BASE/scripts/tts-native.sh" -o "$INSTALL_DIR/scripts/tts-native.sh" 2>/dev/null || true
594595
mkdir -p "$INSTALL_DIR/docs"
595596
curl -fsSL "$REPO_BASE/docs/peon-icon.png" -o "$INSTALL_DIR/docs/peon-icon.png" 2>/dev/null || true
596597
if [ "$UPDATING" = false ]; then
@@ -674,6 +675,7 @@ chmod +x "$INSTALL_DIR/scripts/hook-handle-use.sh" 2>/dev/null || true
674675
chmod +x "$INSTALL_DIR/scripts/hook-handle-rename.sh" 2>/dev/null || true
675676
chmod +x "$INSTALL_DIR/scripts/pack-download.sh" 2>/dev/null || true
676677
chmod +x "$INSTALL_DIR/scripts/notify.sh" 2>/dev/null || true
678+
chmod +x "$INSTALL_DIR/scripts/tts-native.sh" 2>/dev/null || true
677679

678680
# --- Build peon-play (macOS Sound Effects device support) ---
679681
if [ "$PLATFORM" = "mac" ] && command -v swiftc &>/dev/null; then

scripts/tts-native.ps1

Lines changed: 224 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,224 @@
1+
<#
2+
.SYNOPSIS
3+
Windows native TTS backend for peon-ping. Speaks stdin text via SAPI5
4+
(System.Speech.Synthesis). Fire-and-forget: exit code is always 0, all
5+
errors are contained, and no output is produced during normal hook
6+
invocations. Debug diagnostics are routed to stderr and gated on
7+
PEON_DEBUG=1.
8+
9+
.DESCRIPTION
10+
Invoked from peon.ps1's Invoke-TtsSpeak helper with decoded plain-text
11+
piped on stdin and voice/rate/volume passed as named parameters. Base64
12+
encoding lives in Invoke-TtsSpeak (it guards the Start-Process -Command
13+
boundary) -- bytes arriving here are already UTF-8 text.
14+
15+
Rate and volume are normalized at the integration layer to platform-
16+
independent floats (rate: 0.0-2.0 with 1.0 = normal; volume: 0.0-1.0)
17+
and mapped internally to SAPI5's native units (rate int -10..+10,
18+
volume int 0..100) with clamping.
19+
20+
Under PEON_TTS_DRY_RUN=1 the script writes the resolved synthesis
21+
parameters as JSON to PEON_TTS_TRACE_FILE and skips the Speak call.
22+
This test hook lets Pester verify behaviour without driving real SAPI.
23+
24+
.PARAMETER InputText
25+
Pipeline input. Each object piped in becomes a line of the buffer;
26+
trailing whitespace is trimmed before synthesis. Empty or whitespace-
27+
only input exits 0 without calling Speak.
28+
29+
.PARAMETER Voice
30+
SAPI5 voice name (exact match against GetInstalledVoices output) or
31+
the sentinel string "default" to use the engine default. A requested
32+
voice that is not installed falls through to the default with a debug
33+
line when PEON_DEBUG=1. Default: "default".
34+
35+
.PARAMETER Rate
36+
Float, 0.0-2.0. 1.0 is normal speed, 0.5 is half, 2.0 is double.
37+
Mapped to SAPI int via [math]::Round((Rate-1.0)*10) and clamped to
38+
-10..+10. Default: 1.0.
39+
40+
.PARAMETER Vol
41+
Float, 0.0-1.0. 0.0 is silent, 1.0 is full volume. Mapped to SAPI int
42+
via [math]::Round(Vol*100) and clamped to 0..100. Default: 0.5.
43+
44+
.PARAMETER ListVoices
45+
If set, prints installed SAPI voice names to stdout (one per line)
46+
and exits 0 without reading stdin or calling Speak.
47+
48+
.EXAMPLE
49+
"hello world" | .\tts-native.ps1 -Voice "Microsoft David" -Rate 1.0 -Vol 0.5
50+
51+
.EXAMPLE
52+
.\tts-native.ps1 -ListVoices
53+
#>
54+
param(
55+
[Parameter(ValueFromPipeline = $true)]
56+
[string]$InputText,
57+
[string]$Voice = "default",
58+
[double]$Rate = 1.0,
59+
[double]$Vol = 0.5,
60+
[switch]$ListVoices
61+
)
62+
63+
begin {
64+
$script:PeonDebug = ($env:PEON_DEBUG -eq "1")
65+
$script:DryRun = ($env:PEON_TTS_DRY_RUN -eq "1")
66+
$script:TracePath = $env:PEON_TTS_TRACE_FILE
67+
68+
function Write-DebugLine {
69+
param([string]$Message)
70+
if ($script:PeonDebug) {
71+
[Console]::Error.WriteLine("[tts-native] $Message")
72+
}
73+
}
74+
75+
function Write-Trace {
76+
param([hashtable]$Fields)
77+
if (-not $script:DryRun) { return }
78+
if (-not $script:TracePath) { return }
79+
try {
80+
$json = $Fields | ConvertTo-Json -Depth 4 -Compress
81+
Set-Content -Path $script:TracePath -Value $json -Encoding UTF8
82+
} catch {
83+
Write-DebugLine "trace write failed: $_"
84+
}
85+
}
86+
87+
# Load System.Speech. If this fails (PowerShell 7 Core on non-Windows,
88+
# missing assembly, etc.) fall through to a no-op path. In dry-run mode
89+
# voice enumeration and synthesis are stubbed so tests work on runners
90+
# without a real SAPI stack.
91+
$script:SpeechLoaded = $false
92+
try {
93+
Add-Type -AssemblyName System.Speech -ErrorAction Stop
94+
$script:SpeechLoaded = $true
95+
} catch {
96+
Write-DebugLine "failed to load System.Speech: $_"
97+
}
98+
99+
# --- -ListVoices short-circuit: runs in begin, exits before process/end ---
100+
if ($ListVoices) {
101+
if ($script:SpeechLoaded) {
102+
try {
103+
$enumSynth = [System.Speech.Synthesis.SpeechSynthesizer]::new()
104+
$enumSynth.GetInstalledVoices() | ForEach-Object {
105+
[Console]::Out.WriteLine($_.VoiceInfo.Name)
106+
}
107+
$enumSynth.Dispose()
108+
} catch {
109+
Write-DebugLine "voice enumeration failed: $_"
110+
}
111+
}
112+
exit 0
113+
}
114+
115+
$script:Buffer = New-Object System.Text.StringBuilder
116+
}
117+
118+
process {
119+
# Pipeline input arrives here one object at a time. Empty values are
120+
# skipped so they do not inject blank lines into the buffer.
121+
if ($null -ne $InputText -and $InputText.Length -gt 0) {
122+
[void]$script:Buffer.AppendLine($InputText)
123+
}
124+
}
125+
126+
end {
127+
$text = $script:Buffer.ToString().TrimEnd()
128+
129+
# Fallback: if invoked via `powershell.exe -File tts-native.ps1` the
130+
# PowerShell pipeline does not bind piped stdin to $InputText -- stdin
131+
# belongs to powershell.exe itself. Read the redirected console stream
132+
# directly so the DoD smoke test (`"text" | powershell -File ...`) and
133+
# external callers behave the same as an in-process pipeline.
134+
if (-not $text) {
135+
try {
136+
if ([Console]::IsInputRedirected) {
137+
$stdin = [Console]::In.ReadToEnd()
138+
if ($stdin) { $text = $stdin.TrimEnd() }
139+
}
140+
} catch {
141+
Write-DebugLine "stdin read failed: $_"
142+
}
143+
}
144+
145+
if (-not $text) {
146+
Write-Trace @{ Spoke = $false; Reason = "empty-input" }
147+
exit 0
148+
}
149+
150+
# Unit conversions. Pure arithmetic -- no engine calls yet.
151+
$sapiRate = [int][math]::Round(($Rate - 1.0) * 10)
152+
$sapiRate = [math]::Max(-10, [math]::Min(10, $sapiRate))
153+
154+
$sapiVolume = [int][math]::Round($Vol * 100)
155+
$sapiVolume = [math]::Max(0, [math]::Min(100, $sapiVolume))
156+
157+
# Voice resolution. The "default" sentinel means "do not call SelectVoice";
158+
# any explicit name is looked up in the installed voices list. A miss
159+
# emits a debug line and falls through to the engine default.
160+
$selectVoiceCalled = $false
161+
$selectedVoice = $null
162+
$installedVoices = @()
163+
164+
if ($script:SpeechLoaded) {
165+
try {
166+
$probe = [System.Speech.Synthesis.SpeechSynthesizer]::new()
167+
$installedVoices = @($probe.GetInstalledVoices() | ForEach-Object { $_.VoiceInfo.Name })
168+
$probe.Dispose()
169+
} catch {
170+
Write-DebugLine "voice probe failed: $_"
171+
}
172+
}
173+
174+
$voiceToSelect = $null
175+
if ($Voice -and $Voice -ne "default") {
176+
if ($installedVoices -contains $Voice) {
177+
$voiceToSelect = $Voice
178+
$selectVoiceCalled = $true
179+
$selectedVoice = $Voice
180+
} else {
181+
Write-DebugLine "voice '$Voice' not installed; using default"
182+
}
183+
}
184+
185+
if ($script:DryRun) {
186+
Write-Trace @{
187+
Spoke = $true
188+
Text = $text
189+
SapiRate = $sapiRate
190+
SapiVolume = $sapiVolume
191+
SelectVoiceCalled = $selectVoiceCalled
192+
SelectedVoice = $selectedVoice
193+
RequestedVoice = $Voice
194+
}
195+
exit 0
196+
}
197+
198+
if (-not $script:SpeechLoaded) {
199+
# Nothing more to do -- Add-Type failed, we already logged the reason.
200+
exit 0
201+
}
202+
203+
try {
204+
$synth = [System.Speech.Synthesis.SpeechSynthesizer]::new()
205+
$synth.Rate = $sapiRate
206+
$synth.Volume = $sapiVolume
207+
208+
if ($voiceToSelect) {
209+
try {
210+
$synth.SelectVoice($voiceToSelect)
211+
} catch {
212+
Write-DebugLine "SelectVoice('$voiceToSelect') failed: $_"
213+
}
214+
}
215+
216+
$synth.Speak($text)
217+
$synth.Dispose()
218+
} catch {
219+
Write-DebugLine "SAPI5 synthesis failed: $_"
220+
# Do not propagate -- hook must not fail on TTS errors.
221+
}
222+
223+
exit 0
224+
}

0 commit comments

Comments
 (0)