Skip to content

Commit bc3322e

Browse files
authored
Support Kokoro TTS for HarmonyOS. (#1743)
1 parent 5bcd7e1 commit bc3322e

File tree

5 files changed

+82
-35
lines changed

5 files changed

+82
-35
lines changed

harmony-os/SherpaOnnxHar/sherpa_onnx/BuildProfile.ets

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/**
22
* Use these variables when you tailor your ArkTS code. They must be of the const type.
33
*/
4-
export const HAR_VERSION = '1.10.37';
4+
export const HAR_VERSION = '1.10.40';
55
export const BUILD_MODE_NAME = 'debug';
66
export const DEBUG = true;
77
export const TARGET_NAME = 'default';

harmony-os/SherpaOnnxHar/sherpa_onnx/Index.ets

+2-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,8 @@ export { OnlineStream,
3131
OnlineRecognizer,
3232
} from './src/main/ets/components/StreamingAsr';
3333

34-
export { OfflineTtsMatchaModelConfig,
34+
export { OfflineTtsKokoroModelConfig,
35+
OfflineTtsMatchaModelConfig,
3536
OfflineTtsVitsModelConfig,
3637
OfflineTtsModelConfig,
3738
OfflineTtsConfig,

harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/NonStreamingTts.ets

+9
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,18 @@ export class OfflineTtsMatchaModelConfig {
2828
public lengthScale: number = 1.0;
2929
}
3030

31+
export class OfflineTtsKokoroModelConfig {
32+
public model: string = '';
33+
public voices: string = '';
34+
public tokens: string = '';
35+
public dataDir: string = '';
36+
public lengthScale: number = 1.0;
37+
}
38+
3139
export class OfflineTtsModelConfig {
3240
public vits: OfflineTtsVitsModelConfig = new OfflineTtsVitsModelConfig();
3341
public matcha: OfflineTtsMatchaModelConfig = new OfflineTtsMatchaModelConfig();
42+
public kokoro: OfflineTtsKokoroModelConfig = new OfflineTtsKokoroModelConfig();
3443
public numThreads: number = 1;
3544
public debug: boolean = false;
3645
public provider: string = 'cpu';

harmony-os/SherpaOnnxTts/entry/src/main/ets/pages/Index.ets

+3
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ struct Index {
6666
@State initTtsDone: boolean = false;
6767
@State ttsGeneratedDone: boolean = true;
6868
@State numSpeakers: number = 1;
69+
@State numThreads: number = 1;
6970
@State initAudioDone: boolean = false;
7071
private controller: TabsController = new TabsController();
7172
private cancelled: boolean = false;
@@ -135,6 +136,7 @@ struct Index {
135136
this.info = 'Model initialized!\nPlease enter text and press start.';
136137
this.sampleRate = e.data['sampleRate'] as number;
137138
this.numSpeakers = e.data['numSpeakers'] as number;
139+
this.numThreads = e.data['numThreads'] as number;
138140

139141
this.initTtsDone = true;
140142
}
@@ -177,6 +179,7 @@ struct Index {
177179
this.info = `Audio duration: ${audioDuration} s
178180
Elapsed: ${elapsedSeconds} s
179181
RTF = ${elapsedSeconds.toFixed(2)}/${audioDuration.toFixed(2)} = ${RTF.toFixed(3)}
182+
Number of threads: ${this.numThreads}
180183
`;
181184
if (this.cancelled) {
182185
this.info += '\nCancelled.';

harmony-os/SherpaOnnxTts/entry/src/main/ets/workers/NonStreamingTtsWorker.ets

+67-33
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import worker, { ThreadWorkerGlobalScope, MessageEvents, ErrorEvent } from '@oho
22

33
import { fileIo as fs } from '@kit.CoreFileKit';
44

5-
import {OfflineTtsConfig, OfflineTts, listRawfileDir, TtsInput, TtsOutput} from 'sherpa_onnx';
5+
import { OfflineTtsConfig, OfflineTts, listRawfileDir, TtsInput, TtsOutput } from 'sherpa_onnx';
66
import { buffer } from '@kit.ArkTS';
77

88
const workerPort: ThreadWorkerGlobalScope = worker.workerPort;
@@ -42,18 +42,22 @@ function copyRawFileDirToSandbox(context: Context, srcDir: string) {
4242
}
4343
}
4444

45-
function copyRawFileToSandbox(context: Context, src: string, dst: string) {
46-
// see https://blog.csdn.net/weixin_44640245/article/details/142634846
47-
// https://developer.huawei.com/consumer/cn/doc/harmonyos-guides-V5/rawfile-guidelines-V5
45+
function copyRawFileToSandbox(context: Context, src: string,
46+
dst: string) {
47+
/* see
48+
https://blog.csdn.net/weixin_44640245/article/details/142634846
49+
https://developer.huawei.com/consumer/cn/doc/harmonyos-guides-V5/rawfile-guidelines-V5
50+
*/
4851
let uint8Array: Uint8Array = context.resourceManager.getRawFileContentSync(src);
4952

5053
// https://developer.huawei.com/consumer/cn/doc/harmonyos-references-V5/js-apis-file-fs-V5#fsmkdir
5154
let sandboxPath: string = context.getApplicationContext().filesDir;
52-
let filepath = sandboxPath + '/' + dst;
55+
let filepath = sandboxPath + '/' + dst;
5356

5457
if (fs.accessSync(filepath)) {
55-
// if the destination exists and has the expected file size,
56-
// then we skip copying it
58+
/* if the destination exists and has the expected file size
59+
then we skip copying it
60+
*/
5761
let stat = fs.statSync(filepath);
5862
if (stat.size == uint8Array.length) {
5963
return;
@@ -66,11 +70,12 @@ function copyRawFileToSandbox(context: Context, src: string, dst: string) {
6670
}
6771

6872
function initTts(context: Context): OfflineTts {
69-
// Such a design is to make it easier to build flutter APPs with
70-
// github actions for a variety of tts models
71-
//
72-
// See https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/flutter/generate-tts.py
73-
// for details
73+
/* Such a design is to make it easier to build flutter APPs with
74+
github actions for a variety of tts models
75+
76+
See https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/flutter/generate-tts.py
77+
for details
78+
*/
7479

7580
let modelDir = '';
7681

@@ -83,13 +88,19 @@ function initTts(context: Context): OfflineTts {
8388
let vocoder = '';
8489
// for Matcha end
8590

91+
// for Kokoro begin
92+
let voices = '';
93+
// for Kokoro end
94+
8695
let ruleFsts = '';
8796
let ruleFars = '';
8897
let lexicon = '';
8998
let dataDir = '';
9099
let dictDir = '';
91-
// You can select an example below and change it according to match your
92-
// selected tts model
100+
/*
101+
You can select an example below and change it according to match your
102+
selected tts model
103+
*/
93104

94105
// ============================================================
95106
// Your change starts here
@@ -146,19 +157,26 @@ function initTts(context: Context): OfflineTts {
146157
// Example 8
147158
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
148159
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
149-
// modelDir = 'matcha-icefall-zh-baker'
150-
// acousticModelName = 'model-steps-3.onnx'
151-
// vocoder = 'hifigan_v2.onnx'
152-
// lexicon = 'lexicon.txt'
160+
// modelDir = 'matcha-icefall-zh-baker';
161+
// acousticModelName = 'model-steps-3.onnx';
162+
// vocoder = 'hifigan_v2.onnx';
163+
// lexicon = 'lexicon.txt';
153164
// dictDir = 'dict';
154165
// ruleFsts = `date.fst,phone.fst,number.fst`;
155166

156167
// Example 9
157168
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
158169
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
159-
// modelDir = 'matcha-icefall-en_US-ljspeech'
160-
// acousticModelName = 'model-steps-3.onnx'
161-
// vocoder = 'hifigan_v2.onnx'
170+
// modelDir = 'matcha-icefall-en_US-ljspeech';
171+
// acousticModelName = 'model-steps-3.onnx';
172+
// vocoder = 'hifigan_v2.onnx';
173+
// dataDir = 'espeak-ng-data';
174+
175+
// Example 10
176+
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html#kokoro-en-v0-19-english-11-speakers
177+
// modelDir = 'kokoro-en-v0_19';
178+
// modelName = 'model.onnx';
179+
// voices = 'voices.bin'
162180
// dataDir = 'espeak-ng-data';
163181

164182
// ============================================================
@@ -185,6 +203,10 @@ function initTts(context: Context): OfflineTts {
185203
acousticModelName = modelDir + '/' + acousticModelName;
186204
}
187205

206+
if (voices != '') {
207+
voices = modelDir + '/' + voices;
208+
}
209+
188210
if (ruleFsts != '') {
189211
let fsts = ruleFsts.split(',')
190212
let tmp: string[] = [];
@@ -210,19 +232,24 @@ function initTts(context: Context): OfflineTts {
210232
if (dataDir != '') {
211233
copyRawFileDirToSandbox(context, modelDir + '/' + dataDir)
212234
let sandboxPath: string = context.getApplicationContext().filesDir;
213-
dataDir = sandboxPath + '/' + modelDir + '/' + dataDir;
235+
dataDir = sandboxPath + '/' + modelDir + '/' + dataDir;
214236
}
215237

216238
if (dictDir != '') {
217239
copyRawFileDirToSandbox(context, modelDir + '/' + dictDir)
218240
let sandboxPath: string = context.getApplicationContext().filesDir;
219-
dictDir = sandboxPath + '/' + modelDir + '/' + dictDir;
241+
dictDir = sandboxPath + '/' + modelDir + '/' + dictDir;
220242
}
221243

222244
const tokens = modelDir + '/tokens.txt';
223245

224246
const config: OfflineTtsConfig = new OfflineTtsConfig();
225-
config.model.vits.model = modelName;
247+
if (voices != '') {
248+
config.model.vits.model = '';
249+
} else {
250+
config.model.vits.model = modelName;
251+
}
252+
226253
config.model.vits.lexicon = lexicon;
227254
config.model.vits.tokens = tokens;
228255
config.model.vits.dataDir = dataDir;
@@ -235,6 +262,15 @@ function initTts(context: Context): OfflineTts {
235262
config.model.matcha.dataDir = dataDir;
236263
config.model.matcha.dictDir = dictDir;
237264

265+
if (voices != '') {
266+
config.model.kokoro.model = modelName;
267+
} else {
268+
config.model.kokoro.model = '';
269+
}
270+
config.model.kokoro.voices = voices;
271+
config.model.kokoro.tokens = tokens;
272+
config.model.kokoro.dataDir = dataDir;
273+
238274
config.model.numThreads = 2;
239275
config.model.debug = true;
240276
config.ruleFsts = ruleFsts;
@@ -250,14 +286,12 @@ interface TtsCallbackData {
250286

251287
function callback(data: TtsCallbackData): number {
252288
workerPort.postMessage({
253-
'msgType': 'tts-generate-partial',
254-
samples: Float32Array.from(data.samples),
255-
progress: data.progress,
289+
'msgType': 'tts-generate-partial', samples: Float32Array.from(data.samples), progress: data.progress,
256290
});
257291

258292
// 0 means to stop generating in C++
259293
// 1 means to continue generating in C++
260-
return cancelled? 0 : 1;
294+
return cancelled ? 0 : 1;
261295
}
262296

263297
/**
@@ -272,9 +306,11 @@ workerPort.onmessage = (e: MessageEvents) => {
272306
if (msgType == 'init-tts' && !tts) {
273307
const context = e.data['context'] as Context;
274308
tts = initTts(context);
275-
workerPort.postMessage({ 'msgType': 'init-tts-done',
309+
workerPort.postMessage({
310+
'msgType': 'init-tts-done',
276311
sampleRate: tts.sampleRate,
277312
numSpeakers: tts.numSpeakers,
313+
numThreads: tts.config.model.numThreads,
278314
});
279315
}
280316

@@ -297,16 +333,14 @@ workerPort.onmessage = (e: MessageEvents) => {
297333
console.log(`sampleRate: ${ttsOutput.sampleRate}`);
298334

299335
workerPort.postMessage({
300-
'msgType': 'tts-generate-done',
301-
samples: Float32Array.from(ttsOutput.samples),
336+
'msgType': 'tts-generate-done', samples: Float32Array.from(ttsOutput.samples),
302337
});
303338

304339
});
305340
} else {
306341
const ttsOutput: TtsOutput = tts.generate(input);
307342
workerPort.postMessage({
308-
'msgType': 'tts-generate-done',
309-
samples: Float32Array.from(ttsOutput.samples),
343+
'msgType': 'tts-generate-done', samples: Float32Array.from(ttsOutput.samples),
310344
});
311345
}
312346

0 commit comments

Comments
 (0)