Skip to content

DRAFT: DotNetSpeech Synthesis engine #375

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions .github/workflows/python_publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,28 @@ jobs:
with:
python-version: ${{ matrix.python-version }}

- name: Install MSBuild and other dependencies on Windows for DotNet
if: runner.os == 'Windows'
shell: pwsh
run: |
choco install dotnetfx -y
choco install netfx-4.8-devpack -y
choco install visualstudio2019-buildtools -y
choco install visualstudio2022-workload-vctools -y
$env:Path += ";C:\Program Files (x86)\Microsoft Visual S tudio\2019\BuildTools\MSBuild\Current\Bin"
$env:Path += ";C:\Program Files\Microsoft Visual Studio\2022\BuildTools\MSBuild\Current\Bin"
pip install pycparser

- name: Check .NET Framework Installation Path for System.Speech
if: runner.os == 'Windows'
shell: pwsh
run: |
if (Test-Path "C:\Windows\Microsoft.NET\assembly\GAC_MSIL\System.Speech") {
Write-Output "System.Speech is found in GAC."
} else {
Write-Output "System.Speech is NOT found in GAC."
}

- name: Install dependencies
run: |
pip install --upgrade pip
Expand Down
1 change: 1 addition & 0 deletions MANIFEST
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ pyttsx3\engine.py
pyttsx3\voice.py
pyttsx3\drivers\__init__.py
pyttsx3\drivers\_espeak.py
pyttsx3\drivers\dotnetspeech.py
pyttsx3\drivers\dummy.py
pyttsx3\drivers\espeak.py
pyttsx3\drivers\nsss.py
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ dependencies = [
"pyobjc>=2.4; platform_system=='Darwin'",
"pypiwin32; platform_system=='Windows'",
"pywin32; platform_system=='Windows'",
"pythonnet; platform_system=='Windows'",
]

urls = { Homepage = "https://github.com/nateshmbhat/pyttsx3" }
Expand Down
4 changes: 4 additions & 0 deletions pyttsx3/driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ def _push(self, mtd, args, name=None):
@type name: str
"""
self._queue.append((mtd, args, name))
print(f"[DEBUG] Added to proxy queue: {mtd.__name__} with args: {args}")

self._pump()

def _pump(self):
Expand All @@ -75,6 +77,7 @@ def _pump(self):
driver is not currently busy.
"""
while (not self._busy) and len(self._queue):
print("[DEBUG] Pumping next command from queue.")
cmd = self._queue.pop(0)
self._name = cmd[2]
try:
Expand Down Expand Up @@ -124,6 +127,7 @@ def say(self, text, name):
@param name: Name to associate with the utterance
@type name: str
"""
print("[DEBUG] - DRiver Adding text to queue: {text}")
self._current_text = text
self._push(self._driver.say, (text,), name)

Expand Down
144 changes: 144 additions & 0 deletions pyttsx3/drivers/dotnetspeech.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
import clr
import threading
import queue
import time


def get_dotnet_speech_classes():
"""Sets up .NET references and imports System.Speech classes explicitly from GAC."""
clr.AddReference(r"C:\Windows\Microsoft.NET\assembly\GAC_MSIL\System.Speech\v4.0_4.0.0.0__31bf3856ad364e35\System.Speech")
from System.Speech.Synthesis import SpeechSynthesizer, SpeakCompletedEventArgs
return SpeechSynthesizer, SpeakCompletedEventArgs


SpeechSynthesizer, SpeakCompletedEventArgs = get_dotnet_speech_classes()


def buildDriver(proxy):
return DotNetSpeech(proxy)

class DotNetSpeech:
def __init__(self, proxy):
# Initialize .NET SpeechSynthesizer
print("Initializing DotNetSpeech driver...")
self._synthesizer = SpeechSynthesizer()
self._proxy = proxy
self._queue = queue.Queue()
self._speaking = False
self._looping = False
self._stop_requested = False
self._rate = 1.0
self._volume = 1.0
self._current_voice = None
self._synthesizer.SpeakCompleted += self._on_speak_completed
print("DotNetSpeech driver initialized.")

def say(self, text):
print(f"SAY Adding text to queue: {text}")
try:
"""Queue a speech request."""
print("Queueing speech request...")
self._queue.put(("say", text)) # Add speech command to the queue
print(f"Queue size after adding: {self._queue.qsize()}")
except Exception as e:
print(f"Error queuing speech request: {e}")

# Do not set busy here; rely on _start_processing to set it when processing starts

def _on_speak_completed(self, sender, event_args):
"""Callback for when asynchronous speech completes."""
self._speaking = False
print("Speech completed.")
self._proxy.setBusy(False) # Set busy to False when speech is done
self._proxy.notify("finished-utterance", completed=True)

def _start_processing(self):
"""Process the speech queue in a background thread."""
if not self._speaking and not self._queue.empty():
print("[DEBUG] Processing queue in DotNetSpeech...")
self._speaking = True
action, text = self._queue.get()
print(f"Executing action: {action} with text: {text}")
if action == "say":
self._proxy.setBusy(True) # Set busy when starting to speak
speech_thread = threading.Thread(
target=self._synthesizer.SpeakAsync, args=(text,)
)
speech_thread.start()

def startLoop(self):
self._looping = True
print("[DEBUG] Starting loop in DotNetSpeech...")
while self._looping:
self._start_processing()
time.sleep(0.1)
print("[DEBUG] Loop ended in DotNetSpeech.")

def endLoop(self):
"""End the internal loop."""
print("Ending loop...")
self._looping = False

def runAndWait(self):
"""Run an event loop until all commands queued up until this method call complete."""
print("Running and waiting for all commands to complete...")
self.startLoop() # Starts processing the queue
while self._proxy.isBusy(): # Wait until all speech commands are processed
time.sleep(0.1)
self.endLoop() # Ends the loop after processing is complete


def stop(self):
"""Stop current speech and clear the queue."""
self._stop_requested = True
self._synthesizer.SpeakAsyncCancelAll()
self._queue.queue.clear()
self._proxy.setBusy(False)
self.notify("finished-utterance", name=self._current_text, completed=False) # Notify of interruption


def save_to_file(self, text, filename):
"""Save spoken text to a file."""
self._synthesizer.SetOutputToWaveFile(filename)
self._synthesizer.Speak(text)
self._synthesizer.SetOutputToDefaultAudioDevice()

def setProperty(self, name, value):
"""Set properties like rate and volume."""
if name == "rate":
self._synthesizer.Rate = int(value * 10)
self._rate = value
elif name == "volume":
self._synthesizer.Volume = int(value * 100)
self._volume = value
elif name == "voice":
self._synthesizer.SelectVoice(value)
self._current_voice = value
else:
raise KeyError(f"Unknown property '{name}'")

def getProperty(self, name):
"""Get property values."""
if name == "rate":
return self._rate
elif name == "volume":
return self._volume
elif name == "voices":
voices = []
for voice in self._synthesizer.GetInstalledVoices():
info = voice.VoiceInfo
voices.append(
Voice(
id=info.Id,
name=info.Name,
languages=[info.Culture.Name],
gender=info.Gender.ToString(),
age=info.Age.ToString(),
)
)
return voices
elif name == "voice":
return self._current_voice
else:
raise KeyError(f"Unknown property '{name}'")

4 changes: 3 additions & 1 deletion pyttsx3/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
# The first engine in the value tuple is the default engine for that platform.
_engines_by_sys_platform = {
"darwin": ("nsss", "espeak"), # NSSpeechSynthesizer (deprecated)
"win32": ("sapi5", "espeak"),
"win32": ("sapi5", "dotnetspeech", "espeak"),
}


Expand Down Expand Up @@ -139,6 +139,8 @@ def say(self, text: str | None, name: str | None = None):
@type name: str
"""
if str(text or "").strip():
print(f"[DEBUG] engine - Adding utterance to speak: {text}")
print(f"[DEBUG] DotNetSpeech instance ID: {id(self)}")
self.proxy.say(text, name)
else:
return "Argument value can't be None or empty"
Expand Down
19 changes: 19 additions & 0 deletions test2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import pyttsx3
import time
import logging

logging.basicConfig(level=logging.DEBUG)

try:
engine = pyttsx3.init("dotnetspeech")
print("[DEBUG] Engine initialized.")
try:
# Call say and directly invoke processing methods
engine.say("The quick brown fox jumped over the lazy dog.")
print("[DEBUG] Say method executed.")
# engine.runAndWait()
# print("[DEBUG] runAndWait executed.")
except Exception as e:
print(f"[ERROR] Exception in processing: {e}")
except Exception as e:
print(f"[ERROR] Exception during initialization: {e}")
Loading