Skip to content

Commit 032ea2e

Browse files
committed
feat: 优化字节跳动声音克隆功能
- 修复音频文件上传和处理逻辑 - 完善声音训练状态检查机制 - 增加音频文件格式和大小验证 - 优化HTTP请求处理和错误处理 - 添加音色列表加载和选择功能 - 支持多语言声音克隆配置 - 改进训练状态展示和音频预览
1 parent 8cb0285 commit 032ea2e

File tree

5 files changed

+467
-37
lines changed

5 files changed

+467
-37
lines changed

src-tauri/src/commands.rs

+28
Original file line numberDiff line numberDiff line change
@@ -144,3 +144,31 @@ pub async fn check_server_status(
144144
error_msg
145145
})
146146
}
147+
148+
// 添加新的命令处理函数
149+
#[tauri::command]
150+
pub async fn proxy_request_with_headers(
151+
_window: tauri::Window,
152+
target_url: String,
153+
method: String,
154+
headers: std::collections::HashMap<String, String>,
155+
body: Vec<u8>
156+
) -> Result<String, String> {
157+
let function_name = "proxy_request_with_headers";
158+
log_message(
159+
format!("[{}] Received request for URL: {}", function_name, target_url),
160+
"INFO".to_string(),
161+
MODEL_NAME.to_string(),
162+
);
163+
164+
HTTP_CLIENT.send_request_with_headers(&target_url, &method, headers, body).await
165+
.map_err(|e| {
166+
let error_msg = format!("[{}] Request failed: {}", function_name, e);
167+
log_message(
168+
error_msg.clone(),
169+
"ERROR".to_string(),
170+
MODEL_NAME.to_string(),
171+
);
172+
error_msg
173+
})
174+
}

src-tauri/src/http_client.rs

+79
Original file line numberDiff line numberDiff line change
@@ -123,4 +123,83 @@ impl HttpClient {
123123
// 返回状态码是否为200
124124
Ok(response.status().as_u16() == 200)
125125
}
126+
127+
pub async fn proxy_request_with_headers(
128+
&self,
129+
target_url: &str,
130+
method: &str,
131+
headers: std::collections::HashMap<String, String>,
132+
body: Vec<u8>,
133+
) -> Result<Response> {
134+
log_message(
135+
format!("Proxying {} request to {} with headers", method, target_url),
136+
"DEBUG".to_string(),
137+
MODEL_NAME.to_string(),
138+
);
139+
140+
// 根据HTTP方法构建请求
141+
let mut request_builder = match method {
142+
"GET" => self.client.get(target_url),
143+
"POST" => self.client.post(target_url),
144+
_ => {
145+
log_message(
146+
format!("Unsupported HTTP method: {}", method),
147+
"ERROR".to_string(),
148+
MODEL_NAME.to_string(),
149+
);
150+
return Err(anyhow::anyhow!("Unsupported HTTP method"));
151+
}
152+
};
153+
154+
// 添加headers
155+
for (key, value) in headers {
156+
// 在日志记录前先克隆值
157+
let key_clone = key.clone();
158+
let value_clone = value.clone();
159+
160+
log_message(
161+
format!("Adding header: {} = {}", key_clone, value_clone),
162+
"DEBUG".to_string(),
163+
MODEL_NAME.to_string(),
164+
);
165+
166+
request_builder = request_builder.header(key, value);
167+
}
168+
169+
// 添加body并发送请求
170+
let response = request_builder.body(body).send().await?;
171+
172+
log_message(
173+
format!(
174+
"Received response: Status={}, Content-Length={:?}",
175+
response.status(),
176+
response.headers().get("content-length")
177+
),
178+
"INFO".to_string(),
179+
MODEL_NAME.to_string(),
180+
);
181+
182+
Ok(response)
183+
}
184+
185+
pub async fn send_request_with_headers(
186+
&self,
187+
target_url: &str,
188+
method: &str,
189+
headers: std::collections::HashMap<String, String>,
190+
body: Vec<u8>
191+
) -> Result<String, String> {
192+
let function_name = "send_request_with_headers";
193+
log_message(
194+
format!("[{}] Sending {} request to {}", function_name, method, target_url),
195+
"DEBUG".to_string(),
196+
MODEL_NAME.to_string(),
197+
);
198+
199+
let response = self.proxy_request_with_headers(target_url, method, headers, body).await
200+
.map_err(|e| format!("Request failed: {}", e))?;
201+
202+
response.text().await
203+
.map_err(|e| format!("Failed to parse response: {}", e))
204+
}
126205
}

src-tauri/src/main.rs

+2
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ mod http_client;
1212
use crate::logger::setup_logging;
1313
use crate::device_manager::DeviceManager;
1414
use std::sync::Arc;
15+
use tauri::Manager;
1516

1617
#[tokio::main]
1718
async fn main() -> Result<(), Box<dyn std::error::Error>> {
@@ -42,6 +43,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
4243
commands::clear_logs,
4344
commands::get_serial_ports,
4445
commands::proxy_request,
46+
commands::proxy_request_with_headers,
4547
commands::check_server_status,
4648
])
4749
.run(tauri::generate_context!())

src/components/ByteDanceRecorder.tsx

+184-12
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
1-
import React, { useState, useRef } from 'react';
1+
import React, { useState, useRef, useEffect } from 'react';
22
import { Button } from "@/components/ui/button";
33
import { Progress } from "@/components/ui/progress";
44
import { Label } from "@/components/ui/label";
55
import { Switch } from "@/components/ui/switch";
66
import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@/components/ui/select";
77
import { Textarea } from "@/components/ui/textarea";
8-
import { uploadAudioFile } from '../lib/bytedanceTts';
8+
import { uploadAudioFile, checkVoiceStatus, VoiceStatus, VoiceStatusResponse, VoiceInfo, listAvailableVoices } from '../lib/bytedanceTts';
99
import { logger } from '../utils/logger';
10+
import { Alert, AlertDescription } from "@/components/ui/alert";
1011

1112
const ModelName = "ByteDanceRecorder";
1213

@@ -29,6 +30,14 @@ export function ByteDanceRecorder({ onVoiceCloned }: ByteDanceRecorderProps) {
2930
const [isUploading, setIsUploading] = useState(false);
3031
const [uploadProgress, setUploadProgress] = useState(0);
3132
const fileInputRef = useRef<HTMLInputElement>(null);
33+
const [currentVoiceId, setCurrentVoiceId] = useState<string | null>(null);
34+
const [trainingStatus, setTrainingStatus] = useState<VoiceStatusResponse | null>(null);
35+
const [isCheckingStatus, setIsCheckingStatus] = useState(false);
36+
const audioRef = useRef<HTMLAudioElement>(null);
37+
const [availableVoices, setAvailableVoices] = useState<VoiceInfo[]>([]);
38+
const [selectedVoiceId, setSelectedVoiceId] = useState<string>('');
39+
const [isLoadingVoices, setIsLoadingVoices] = useState(false);
40+
const [loadVoicesError, setLoadVoicesError] = useState<string | null>(null);
3241

3342
// 配置选项
3443
const [language, setLanguage] = useState<Language>(0);
@@ -37,6 +46,75 @@ export function ByteDanceRecorder({ onVoiceCloned }: ByteDanceRecorderProps) {
3746
const [noiseReduction, setNoiseReduction] = useState(true);
3847
const [volumeNormalization, setVolumeNormalization] = useState(true);
3948

49+
// 加载可用音色列表
50+
useEffect(() => {
51+
async function loadVoices() {
52+
try {
53+
setIsLoadingVoices(true);
54+
setLoadVoicesError(null);
55+
logger.log('Starting to load available voices...', 'INFO', ModelName);
56+
57+
const voices = await listAvailableVoices();
58+
logger.log(`Successfully loaded ${voices.length} voices`, 'INFO', ModelName);
59+
60+
setAvailableVoices(voices);
61+
62+
// 如果有可用音色,默认选择第一个
63+
if (voices.length > 0) {
64+
setSelectedVoiceId(voices[0].speakerId);
65+
logger.log(`Selected default voice: ${voices[0].speakerId}`, 'INFO', ModelName);
66+
} else {
67+
logger.log('No voices available', 'WARN', ModelName);
68+
}
69+
} catch (error) {
70+
const errorMessage = error instanceof Error ? error.message : '未知错误';
71+
logger.log(`Failed to load voices: ${errorMessage}`, 'ERROR', ModelName);
72+
setLoadVoicesError(errorMessage);
73+
alert(`加载音色列表失败: ${errorMessage}`);
74+
} finally {
75+
setIsLoadingVoices(false);
76+
}
77+
}
78+
79+
loadVoices();
80+
}, []);
81+
82+
// 定期检查训练状态
83+
useEffect(() => {
84+
let intervalId: NodeJS.Timeout;
85+
86+
const checkStatus = async () => {
87+
if (!currentVoiceId ||
88+
(trainingStatus?.status !== VoiceStatus.Training &&
89+
trainingStatus?.status !== VoiceStatus.NotFound)) {
90+
return;
91+
}
92+
93+
try {
94+
setIsCheckingStatus(true);
95+
const status = await checkVoiceStatus(currentVoiceId);
96+
setTrainingStatus(status);
97+
98+
if (status.status === VoiceStatus.Success || status.status === VoiceStatus.Active) {
99+
onVoiceCloned(currentVoiceId);
100+
}
101+
} catch (error) {
102+
logger.log(`Failed to check voice status: ${error}`, 'ERROR', ModelName);
103+
} finally {
104+
setIsCheckingStatus(false);
105+
}
106+
};
107+
108+
if (currentVoiceId) {
109+
checkStatus();
110+
intervalId = setInterval(checkStatus, 5000);
111+
}
112+
113+
return () => {
114+
if (intervalId) clearInterval(intervalId);
115+
};
116+
}, [currentVoiceId, trainingStatus?.status]);
117+
40118
const handleFileSelect = async (event: React.ChangeEvent<HTMLInputElement>) => {
41119
const file = event.target.files?.[0];
42120
if (!file) return;
@@ -58,24 +136,18 @@ export function ByteDanceRecorder({ onVoiceCloned }: ByteDanceRecorderProps) {
58136
setIsUploading(true);
59137
logger.log(`Starting file upload: ${file.name}`, 'INFO', ModelName);
60138

61-
// 模拟上传进度
62-
const progressInterval = setInterval(() => {
63-
setUploadProgress(prev => Math.min(prev + 10, 90));
64-
}, 500);
65-
66-
const voiceId = await uploadAudioFile(file, {
139+
const voiceId = await uploadAudioFile(file, selectedVoiceId, {
67140
language,
68141
modelType,
69142
textValidation,
70143
noiseReduction,
71144
volumeNormalization
72145
});
73146

74-
clearInterval(progressInterval);
75-
setUploadProgress(100);
147+
setCurrentVoiceId(voiceId);
148+
setTrainingStatus({ status: VoiceStatus.Training });
76149

77150
logger.log(`File uploaded successfully, voiceId: ${voiceId}`, 'INFO', ModelName);
78-
onVoiceCloned(voiceId);
79151

80152
} catch (error) {
81153
logger.log(`File upload failed: ${error}`, 'ERROR', ModelName);
@@ -89,9 +161,103 @@ export function ByteDanceRecorder({ onVoiceCloned }: ByteDanceRecorderProps) {
89161
}
90162
};
91163

164+
const getStatusDisplay = () => {
165+
if (!trainingStatus) return null;
166+
167+
const statusMessages = {
168+
[VoiceStatus.NotFound]: '未找到音色',
169+
[VoiceStatus.Training]: '正在训练中...',
170+
[VoiceStatus.Success]: '训练成功',
171+
[VoiceStatus.Failed]: '训练失败',
172+
[VoiceStatus.Active]: '音色可用'
173+
};
174+
175+
const statusColors = {
176+
[VoiceStatus.NotFound]: 'bg-gray-100',
177+
[VoiceStatus.Training]: 'bg-yellow-100',
178+
[VoiceStatus.Success]: 'bg-green-100',
179+
[VoiceStatus.Failed]: 'bg-red-100',
180+
[VoiceStatus.Active]: 'bg-green-100'
181+
};
182+
183+
const canUseVoice = trainingStatus.status === VoiceStatus.Success ||
184+
trainingStatus.status === VoiceStatus.Active;
185+
186+
return (
187+
<Alert className={statusColors[trainingStatus.status]}>
188+
<AlertDescription className="space-y-2">
189+
<div className="flex items-center justify-between">
190+
<span>{statusMessages[trainingStatus.status]}</span>
191+
{trainingStatus.version && (
192+
<span className="text-sm text-muted-foreground">版本: {trainingStatus.version}</span>
193+
)}
194+
</div>
195+
196+
{canUseVoice && (
197+
<>
198+
{trainingStatus.createTime && (
199+
<div className="text-sm text-muted-foreground">
200+
创建时间: {new Date(trainingStatus.createTime).toLocaleString()}
201+
</div>
202+
)}
203+
{trainingStatus.demoAudio && (
204+
<div className="mt-2">
205+
<Label>试听效果</Label>
206+
<audio
207+
ref={audioRef}
208+
src={trainingStatus.demoAudio}
209+
controls
210+
className="w-full mt-1"
211+
/>
212+
<div className="text-sm text-muted-foreground mt-1">
213+
试听音频链接有效期为1小时
214+
</div>
215+
</div>
216+
)}
217+
</>
218+
)}
219+
</AlertDescription>
220+
</Alert>
221+
);
222+
};
223+
92224
return (
93225
<div className="space-y-6">
94226
<div className="space-y-4">
227+
<div className="space-y-2">
228+
<Label>选择音色</Label>
229+
{isLoadingVoices ? (
230+
<div className="text-sm text-muted-foreground">加载音色列表中...</div>
231+
) : loadVoicesError ? (
232+
<div className="text-sm text-red-500">
233+
加载失败: {loadVoicesError}
234+
</div>
235+
) : (
236+
<Select
237+
value={selectedVoiceId}
238+
onValueChange={setSelectedVoiceId}
239+
>
240+
<SelectTrigger>
241+
<SelectValue placeholder="选择要使用的音色" />
242+
</SelectTrigger>
243+
<SelectContent>
244+
{availableVoices.map(voice => (
245+
<SelectItem
246+
key={voice.speakerId}
247+
value={voice.speakerId}
248+
>
249+
音色 {voice.speakerId}
250+
{voice.version && ` (${voice.version})`}
251+
</SelectItem>
252+
))}
253+
</SelectContent>
254+
</Select>
255+
)}
256+
<div className="text-sm text-muted-foreground">
257+
选择要使用的音色ID,每个音色有其特定的声音特征
258+
</div>
259+
</div>
260+
95261
<input
96262
type="file"
97263
ref={fileInputRef}
@@ -101,7 +267,7 @@ export function ByteDanceRecorder({ onVoiceCloned }: ByteDanceRecorderProps) {
101267
/>
102268
<Button
103269
onClick={() => fileInputRef.current?.click()}
104-
disabled={isUploading}
270+
disabled={isUploading || !selectedVoiceId}
105271
variant="outline"
106272
className="w-full"
107273
>
@@ -188,6 +354,8 @@ export function ByteDanceRecorder({ onVoiceCloned }: ByteDanceRecorderProps) {
188354
</div>
189355
</div>
190356

357+
{getStatusDisplay()}
358+
191359
<div className="text-sm text-muted-foreground">
192360
注意:
193361
<ul className="list-disc pl-4 space-y-1">
@@ -196,6 +364,10 @@ export function ByteDanceRecorder({ onVoiceCloned }: ByteDanceRecorderProps) {
196364
<li>支持mp3、wav、ogg、m4a、aac格式</li>
197365
<li>复刻的音色将在7天内未使用时自动删除</li>
198366
<li>使用2.0版本时,请确保音频语言与选择的语言一致</li>
367+
<li>训练完成后可以试听效果</li>
368+
<li>训练成功后音色ID将自动保存</li>
369+
<li>每个音色ID代表一种独特的声音特征</li>
370+
<li>请先选择音色ID再上传音频文件</li>
199371
</ul>
200372
</div>
201373
</div>

0 commit comments

Comments
 (0)