Skip to content

eSpeak on Windows and Mac #4

@willwade

Description

@willwade

eSpeak has been dead for me on Windows and Mac. In my current fork I've fixed it for mac There were two things stopping it on Mac. 1 Building it from scratch the normal dll file cant be found so I updated that. (I also refactored that dll finding logic as it was annoying me) and 2. It had no way to play the temp file. aplay is a lunux only thing and the ffmpeg line was never called. So we needed to output to the OS native alternative (afplay on Mac).

Now on Windows its still broken. And I cant figure out why. This code below works (its standalone). But why when I integrate the onSynth logic into the main code it hangs I haven't figured out yet.

import os
import platform
import wave
from tempfile import NamedTemporaryFile
from ctypes import cdll, CFUNCTYPE, POINTER, Structure, c_char_p, c_int, c_short, c_uint, c_void_p, cast

# Conditional import for winsound on Windows
if platform.system() == 'Windows':
    import winsound

dll = None

def load_library():
    global dll
    paths = [
        # macOS paths
        '/usr/local/lib/libespeak-ng.1.dylib',
        '/usr/local/lib/libespeak.dylib',
        
        # Linux paths
        'libespeak-ng.so.1',
        '/usr/local/lib/libespeak-ng.so.1',
        'libespeak.so.1',
        
        # Windows paths
        r'C:\Program Files\eSpeak NG\libespeak-ng.dll',
        r'C:\Program Files (x86)\eSpeak NG\libespeak-ng.dll'
    ]
    
    for path in paths:
        try:
            dll = cdll.LoadLibrary(path)
            print(f"Successfully loaded: {path}")
            return True
        except Exception as e:
            print(f"Failed to load: {path}, Exception: {str(e)}")
    return False

class ESPEAK_EVENT(Structure):
    _fields_ = [
        ("type", c_int),
        ("unique_identifier", c_uint),
        ("text_position", c_int),
        ("length", c_int),
        ("audio_position", c_int),
        ("sample", c_int),
        ("user_data", c_void_p),
    ]

# Define the synthesis callback function
def synth_callback(wav, numsamples, events):
    print("Synthesis callback called")
    
    if not wav or numsamples <= 0:
        print("No samples to process")
        return 0  # Return 0 to indicate success

    stream = NamedTemporaryFile(delete=False, suffix='.wav')

    try:
        with wave.open(stream, 'wb') as f:
            f.setnchannels(1)
            f.setsampwidth(2)
            f.setframerate(22050.0)
            # Convert the wav data to bytes
            audio_data = cast(wav, POINTER(c_short * numsamples)).contents
            byte_data = bytearray()
            for sample in audio_data:
                byte_data.extend(sample.to_bytes(2, byteorder='little', signed=True))
            f.writeframes(byte_data)

        stream.close()
        print(f"Temporary WAV file created at: {stream.name}")

        if platform.system() == 'Darwin':  # macOS
            os.system(f'afplay {stream.name}')
        elif platform.system() == 'Linux':
            os.system(f'aplay {stream.name} -q')
        elif platform.system() == 'Windows':
            print(f"Playing sound on Windows... {stream.name}")
            winsound.PlaySound(stream.name, winsound.SND_FILENAME)
        else:
            raise RuntimeError("Unsupported operating system for audio playback")

    except Exception as e:
        print(f"Error during playback: {e}")
    
    finally:
        try:
            os.remove(stream.name)
            print(f"Temporary WAV file deleted: {stream.name}")
        except Exception as e:
            print(f"Error deleting temporary WAV file: {e}")

    return 0  # Return 0 to indicate success

def main():
    if not load_library():
        raise RuntimeError("This means you probably do not have eSpeak or eSpeak-ng installed!")

    # Initialize eSpeak
    dll.espeak_Initialize.restype = c_int
    if dll.espeak_Initialize(c_int(1), c_int(22050), c_void_p(0), c_int(0)) == -1:
        raise RuntimeError("Failed to initialize eSpeak")
    print("eSpeak initialized")

    # Set the voice
    dll.espeak_SetVoiceByName.restype = c_int
    if dll.espeak_SetVoiceByName(c_char_p(b'en')) != 0:
        raise RuntimeError("Failed to set voice")
    print("Voice set")

    # Define the synthesis callback
    CALLBACK = CFUNCTYPE(c_int, POINTER(c_short), c_int, POINTER(ESPEAK_EVENT))
    callback = CALLBACK(synth_callback)

    # Set the callback function
    dll.espeak_SetSynthCallback(callback)
    print("Synthesis callback set")

    # Send text to eSpeak
    text = "Hello World, this is a test."
    dll.espeak_Synth.restype = c_int
    if dll.espeak_Synth(c_char_p(text.encode('utf-8')), c_int(len(text)), c_int(0), c_int(0), c_int(0), c_uint(0), c_void_p(0), c_void_p(0)) != 0:
        raise RuntimeError("Failed to synthesize text")
    print("Text synthesized")

    # Wait for synthesis to complete
    dll.espeak_Synchronize.restype = c_void_p
    dll.espeak_Synchronize()
    print("Synthesis synchronized")

if __name__ == "__main__":
    try:
        main()
    except Exception as exp:
        print("Exception: " + str(exp) + "\n")
        raise

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions