-
Notifications
You must be signed in to change notification settings - Fork 94
/
Copy pathindex.ts
110 lines (95 loc) · 3.3 KB
/
index.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import { HumeClient } from "hume"
import fs from "fs/promises"
import path from "path"
import * as os from "os"
import * as child_process from "child_process"
import dotenv from "dotenv"
dotenv.config()
const hume = new HumeClient({
apiKey: process.env.HUME_API_KEY!,
})
const outputDir = path.join(os.tmpdir(), `hume-audio-${Date.now()}`)
const writeResultToFile = async (base64EncodedAudio: string, filename: string) => {
const filePath = path.join(outputDir, `${filename}.wav`)
await fs.writeFile(filePath, Buffer.from(base64EncodedAudio, "base64"))
console.log('Wrote', filePath)
}
const startAudioPlayer = () => {
const proc = child_process.spawn('ffplay', ['-nodisp', '-autoexit', '-infbuf', '-i', '-'], {
detached: true,
stdio: ['pipe', 'ignore', 'ignore'],
})
proc.on('error', (err) => {
if ((err as any).code === 'ENOENT') {
console.error('ffplay not found. Please install ffmpeg to play audio.')
}
})
return {
sendAudio: (audio: string) => {
const buffer = Buffer.from(audio, "base64")
proc.stdin.write(buffer)
},
stop: () => {
proc.stdin.end()
proc.unref()
}
}
}
const main = async () => {
await fs.mkdir(outputDir)
console.log('Writing to', outputDir)
const speech1 = await hume.tts.synthesizeJson({
utterances: [{
description: "A refined, British aristocrat",
text: "Take an arrow from the quiver."
}]
})
await writeResultToFile(speech1.generations[0].audio, "speech1_0")
const name = `aristocrat-${Date.now()}`;
await hume.tts.voices.create({
name,
generationId: speech1.generations[0].generationId,
})
const speech2 = await hume.tts.synthesizeJson({
utterances: [{
voice: { name },
text: "Now take a bow."
}],
context: {
generationId: speech1.generations[0].generationId
},
numGenerations: 2,
})
await writeResultToFile(speech2.generations[0].audio, "speech2_0")
await writeResultToFile(speech2.generations[1].audio, "speech2_1")
const speech3 = await hume.tts.synthesizeJson({
utterances: [{
voice: { name },
description: "Murmured softly, with a heavy dose of sarcasm and contempt",
text: "Does he even know how to use that thing?"
}],
context: {
generationId: speech2.generations[0].generationId
},
numGenerations: 1
})
await writeResultToFile(speech3.generations[0].audio, "speech3_0")
const audioPlayer = startAudioPlayer()
for await (const snippet of await hume.tts.synthesizeJsonStreaming({
context: {
generationId: speech3.generations[0].generationId,
},
utterances: [{ text: "He's drawn the bow..." }, { text: "he's fired the arrow..." }, { text: "I can't believe it! A perfect bullseye!" }],
// Uncomment to reduce latency to < 500ms, at a 10% higher cost
// instantMode: true,
//
// By default, the audio data of every chunk returned by `synthesizeJsonStreaming` is a standalone 'mp3' file.
// The `playAudio` function expects to receive a single audio file. You can pass the `stripHeaders` option to
// remove the "headers" from each chunk so that the streamed audio can be played as a single file.
// TODO: stripHeaders: true
})) {
audioPlayer.sendAudio(snippet.audio)
}
audioPlayer.stop()
}
main().then(() => console.log('Done')).catch(console.error)