askrella · luandro · Jan 27, 2024
diff --git a/.env-example b/.env-example
@@ -84,8 +84,15 @@ TTS_ENABLED=false
 # Defines if the bot should use the Speech API or AWS Polly to convert text to speech
 # "speech-api" = It will use our Speech API to transcribe your voice messages
 # "aws-polly" = It will use AWS Polly to convert text to speech
+# 'piper-tts' = It will use Piper TTS to convert text to speech
 TTS_MODE=speech-api
 
+# Piper TTS Config
+# Is a fast, local text to speech system
+# Check how to install here: https://github.com/rhasspy/piper
+# Voice models can be downloaded here: https://huggingface.co/rhasspy/piper-voices/tree/v1.0.0
+PIPER_TTS_MODEL=
+PIPER_TTS_COMMAND=piper-tts
 # AWS Config
 # You can use AWS Polly to convert text to speech
 # You need to have an AWS account and create an IAM user with Polly permissions

diff --git a/src/config.ts b/src/config.ts
@@ -50,6 +50,8 @@ interface IConfig {
 	whisperApiKey: string;
 	ttsEnabled: boolean;
 	ttsMode: TTSMode;
+	piperTTSModel: string;
+	piperTTSCommand: string;
 	transcriptionEnabled: boolean;
 	transcriptionMode: TranscriptionMode;
 	transcriptionLanguage: string;
@@ -98,7 +100,8 @@ export const config: IConfig = {
 	// Text-to-Speech
 	ttsEnabled: getEnvBooleanWithDefault("TTS_ENABLED", false), // Default: false
 	ttsMode: getEnvTTSMode(), // Default: speech-api
-
+	piperTTSModel: getEnvPiperTTSModel(), // Default: ""
+	piperTTSCommand: getEnvPiperTTSCommand(), // Default: piper-tts
 	// Transcription
 	transcriptionEnabled: getEnvBooleanWithDefault("TRANSCRIPTION_ENABLED", false), // Default: false
 	transcriptionMode: getEnvTranscriptionMode(), // Default: local
@@ -185,4 +188,32 @@ function getEnvAWSPollyVoiceEngine(): AWSPollyEngine {
 	return envValue as AWSPollyEngine;
 }
 
+/**
+ * Get the Piper TTS model path from the environment variable
+ * @returns The path to the Piper TTS model
+ */
+function getEnvPiperTTSModel(): string {
+	const envValue = process.env.PIPER_TTS_MODEL;
+	if (envValue == undefined || envValue == "") {
+		// Provide a default path or return an empty string if no default is desired
+		return "/path/to/default/piper/model.onnx";
+	}
+
+	return envValue;
+}
+
+/**
+ * Get the Piper TTS command from the environment variable
+ * @returns The command for Piper TTS
+ */
+function getEnvPiperTTSCommand(): string {
+	const envValue = process.env.PIPER_TTS_COMMAND;
+	if (envValue == undefined || envValue == "") {
+		// Provide a default command or return an empty string if no default is desired
+		return "piper-tts";
+	}
+
+	return envValue;
+}
+
 export default config;
diff --git a/src/handlers/gpt.ts b/src/handlers/gpt.ts
@@ -12,6 +12,8 @@ import { ChatMessage } from "chatgpt";
 // TTS
 import { ttsRequest as speechTTSRequest } from "../providers/speech";
 import { ttsRequest as awsTTSRequest } from "../providers/aws";
+import { ttsRun as piperTTS } from "../providers/piper-tts";
+
 import { TTSMode } from "../types/tts-mode";
 
 // Moderation
@@ -97,6 +99,9 @@ async function sendVoiceMessageReply(message: Message, gptTextResponse: string)
 	var ttsRequest = async function (): Promise<Buffer | null> {
 		return await speechTTSRequest(gptTextResponse);
 	};
+	var ttsRun = async function (tempFilePath): Promise<void> {
+		piperTTS(gptTextResponse, tempFilePath);
+	};
 
 	switch (config.ttsMode) {
 		case TTSMode.SpeechAPI:
@@ -112,7 +117,9 @@ async function sendVoiceMessageReply(message: Message, gptTextResponse: string)
 				return await awsTTSRequest(gptTextResponse);
 			};
 			break;
-
+		case TTSMode.PiperTTS:
+			logTAG = "[PiperTTS]";
+			break;
 		default:
 			logTAG = "[SpeechAPI]";
 			ttsRequest = async function (): Promise<Buffer | null> {
@@ -123,25 +130,31 @@ async function sendVoiceMessageReply(message: Message, gptTextResponse: string)
 
 	// Get audio buffer
 	cli.print(`${logTAG} Generating audio from GPT response "${gptTextResponse}"...`);
-	const audioBuffer = await ttsRequest();
-
-	// Check if audio buffer is valid
-	if (audioBuffer == null || audioBuffer.length == 0) {
-		message.reply(`${logTAG} couldn't generate audio, please contact the administrator.`);
-		return;
-	}
-
-	cli.print(`${logTAG} Audio generated!`);
-
+	let messageMedia;
 	// Get temp folder and file path
 	const tempFolder = os.tmpdir();
 	const tempFilePath = path.join(tempFolder, randomUUID() + ".opus");
 
-	// Save buffer to temp file
-	fs.writeFileSync(tempFilePath, audioBuffer);
+	if (config.ttsMode === TTSMode.PiperTTS) {
+		await ttsRun(tempFilePath);
+		messageMedia = await MessageMedia.fromFilePath(tempFilePath);
+	} else {
+		const audioBuffer = await ttsRequest();
+
+		// Check if audio buffer is valid
+		if (audioBuffer == null || audioBuffer.length == 0) {
+			message.reply(`${logTAG} couldn't generate audio, please contact the administrator.`);
+			return;
+		}
 
-	// Send audio
-	const messageMedia = new MessageMedia("audio/ogg; codecs=opus", audioBuffer.toString("base64"));
+		cli.print(`${logTAG} Audio generated!`);
+
+		// Save buffer to temp file
+		fs.writeFileSync(tempFilePath, audioBuffer);
+
+		// Send audio
+		messageMedia = new MessageMedia("audio/ogg; codecs=opus", audioBuffer.toString("base64"));
+	}
 	message.reply(messageMedia);
 
 	// Delete temp file

diff --git a/src/providers/piper-tts.ts b/src/providers/piper-tts.ts
@@ -0,0 +1,17 @@
+import { execSync } from "child_process";
+import config from "../config";
+
+async function ttsRun(text: string, tempFilePath: string): Promise<void> {
+	const modelFile = config.piperTTSModel;
+	const command = config.piperTTSCommand;
+	try {
+		execSync(
+			`echo "${text}" | ${command} -m ${modelFile} --output-raw | ffmpeg -f s16le -ar 22050 -i pipe: -y -c:a libopus -b:a 17k ${tempFilePath}`
+		);
+		return;
+	} catch (error) {
+		console.error("An error occurred (TTS request)", error);
+	}
+}
+
+export { ttsRun };
diff --git a/src/types/tts-mode.ts b/src/types/tts-mode.ts
@@ -1,4 +1,5 @@
 export enum TTSMode {
 	SpeechAPI = "speech-api",
-	AWSPolly = "aws-polly"
+	AWSPolly = "aws-polly",
+	PiperTTS = "piper-tts"
 }