Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .env-example
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,15 @@ TTS_ENABLED=false
# Defines if the bot should use the Speech API or AWS Polly to convert text to speech
# "speech-api" = It will use our Speech API to transcribe your voice messages
# "aws-polly" = It will use AWS Polly to convert text to speech
# 'piper-tts' = It will use Piper TTS to convert text to speech
TTS_MODE=speech-api

# Piper TTS Config
# Is a fast, local text to speech system
# Check how to install here: https://github.com/rhasspy/piper
# Voice models can be downloaded here: https://huggingface.co/rhasspy/piper-voices/tree/v1.0.0
PIPER_TTS_MODEL=
PIPER_TTS_COMMAND=piper-tts
# AWS Config
# You can use AWS Polly to convert text to speech
# You need to have an AWS account and create an IAM user with Polly permissions
Expand Down
33 changes: 32 additions & 1 deletion src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ interface IConfig {
whisperApiKey: string;
ttsEnabled: boolean;
ttsMode: TTSMode;
piperTTSModel: string;
piperTTSCommand: string;
transcriptionEnabled: boolean;
transcriptionMode: TranscriptionMode;
transcriptionLanguage: string;
Expand Down Expand Up @@ -98,7 +100,8 @@ export const config: IConfig = {
// Text-to-Speech
ttsEnabled: getEnvBooleanWithDefault("TTS_ENABLED", false), // Default: false
ttsMode: getEnvTTSMode(), // Default: speech-api

piperTTSModel: getEnvPiperTTSModel(), // Default: ""
piperTTSCommand: getEnvPiperTTSCommand(), // Default: piper-tts
// Transcription
transcriptionEnabled: getEnvBooleanWithDefault("TRANSCRIPTION_ENABLED", false), // Default: false
transcriptionMode: getEnvTranscriptionMode(), // Default: local
Expand Down Expand Up @@ -185,4 +188,32 @@ function getEnvAWSPollyVoiceEngine(): AWSPollyEngine {
return envValue as AWSPollyEngine;
}

/**
* Get the Piper TTS model path from the environment variable
* @returns The path to the Piper TTS model
*/
function getEnvPiperTTSModel(): string {
const envValue = process.env.PIPER_TTS_MODEL;
if (envValue == undefined || envValue == "") {
// Provide a default path or return an empty string if no default is desired
return "/path/to/default/piper/model.onnx";
}

return envValue;
}

/**
* Get the Piper TTS command from the environment variable
* @returns The command for Piper TTS
*/
function getEnvPiperTTSCommand(): string {
const envValue = process.env.PIPER_TTS_COMMAND;
if (envValue == undefined || envValue == "") {
// Provide a default command or return an empty string if no default is desired
return "piper-tts";
}

return envValue;
}

export default config;
43 changes: 28 additions & 15 deletions src/handlers/gpt.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ import { ChatMessage } from "chatgpt";
// TTS
import { ttsRequest as speechTTSRequest } from "../providers/speech";
import { ttsRequest as awsTTSRequest } from "../providers/aws";
import { ttsRun as piperTTS } from "../providers/piper-tts";

import { TTSMode } from "../types/tts-mode";

// Moderation
Expand Down Expand Up @@ -97,6 +99,9 @@ async function sendVoiceMessageReply(message: Message, gptTextResponse: string)
var ttsRequest = async function (): Promise<Buffer | null> {
return await speechTTSRequest(gptTextResponse);
};
var ttsRun = async function (tempFilePath): Promise<void> {
piperTTS(gptTextResponse, tempFilePath);
};

switch (config.ttsMode) {
case TTSMode.SpeechAPI:
Expand All @@ -112,7 +117,9 @@ async function sendVoiceMessageReply(message: Message, gptTextResponse: string)
return await awsTTSRequest(gptTextResponse);
};
break;

case TTSMode.PiperTTS:
logTAG = "[PiperTTS]";
break;
default:
logTAG = "[SpeechAPI]";
ttsRequest = async function (): Promise<Buffer | null> {
Expand All @@ -123,25 +130,31 @@ async function sendVoiceMessageReply(message: Message, gptTextResponse: string)

// Get audio buffer
cli.print(`${logTAG} Generating audio from GPT response "${gptTextResponse}"...`);
const audioBuffer = await ttsRequest();

// Check if audio buffer is valid
if (audioBuffer == null || audioBuffer.length == 0) {
message.reply(`${logTAG} couldn't generate audio, please contact the administrator.`);
return;
}

cli.print(`${logTAG} Audio generated!`);

let messageMedia;
// Get temp folder and file path
const tempFolder = os.tmpdir();
const tempFilePath = path.join(tempFolder, randomUUID() + ".opus");

// Save buffer to temp file
fs.writeFileSync(tempFilePath, audioBuffer);
if (config.ttsMode === TTSMode.PiperTTS) {
await ttsRun(tempFilePath);
messageMedia = await MessageMedia.fromFilePath(tempFilePath);
} else {
const audioBuffer = await ttsRequest();

// Check if audio buffer is valid
if (audioBuffer == null || audioBuffer.length == 0) {
message.reply(`${logTAG} couldn't generate audio, please contact the administrator.`);
return;
}

// Send audio
const messageMedia = new MessageMedia("audio/ogg; codecs=opus", audioBuffer.toString("base64"));
cli.print(`${logTAG} Audio generated!`);

// Save buffer to temp file
fs.writeFileSync(tempFilePath, audioBuffer);

// Send audio
messageMedia = new MessageMedia("audio/ogg; codecs=opus", audioBuffer.toString("base64"));
}
message.reply(messageMedia);

// Delete temp file
Expand Down
17 changes: 17 additions & 0 deletions src/providers/piper-tts.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import { execSync } from "child_process";
import config from "../config";

async function ttsRun(text: string, tempFilePath: string): Promise<void> {
const modelFile = config.piperTTSModel;
const command = config.piperTTSCommand;
try {
execSync(
`echo "${text}" | ${command} -m ${modelFile} --output-raw | ffmpeg -f s16le -ar 22050 -i pipe: -y -c:a libopus -b:a 17k ${tempFilePath}`
);
return;
} catch (error) {
console.error("An error occurred (TTS request)", error);
}
}

export { ttsRun };
3 changes: 2 additions & 1 deletion src/types/tts-mode.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
export enum TTSMode {
SpeechAPI = "speech-api",
AWSPolly = "aws-polly"
AWSPolly = "aws-polly",
PiperTTS = "piper-tts"
}