feat: 优化字节跳动声音克隆功能

iaiuse · iaiuse · commit 032ea2e52276 · 2025-01-18T02:10:53.000+08:00
- 修复音频文件上传和处理逻辑
- 完善声音训练状态检查机制
- 增加音频文件格式和大小验证
- 优化HTTP请求处理和错误处理
- 添加音色列表加载和选择功能
- 支持多语言声音克隆配置
- 改进训练状态展示和音频预览
diff --git a/src-tauri/src/commands.rs b/src-tauri/src/commands.rs
@@ -144,3 +144,31 @@ pub async fn check_server_status(
             error_msg
         })
 }
+
+// 添加新的命令处理函数
+#[tauri::command]
+pub async fn proxy_request_with_headers(
+    _window: tauri::Window,
+    target_url: String, 
+    method: String,
+    headers: std::collections::HashMap<String, String>,
+    body: Vec<u8>
+) -> Result<String, String> {
+    let function_name = "proxy_request_with_headers";
+    log_message(
+        format!("[{}] Received request for URL: {}", function_name, target_url),
+        "INFO".to_string(),
+        MODEL_NAME.to_string(),
+    );
+    
+    HTTP_CLIENT.send_request_with_headers(&target_url, &method, headers, body).await
+        .map_err(|e| {
+            let error_msg = format!("[{}] Request failed: {}", function_name, e);
+            log_message(
+                error_msg.clone(),
+                "ERROR".to_string(),
+                MODEL_NAME.to_string(),
+            );
+            error_msg
+        })
+}
diff --git a/src-tauri/src/http_client.rs b/src-tauri/src/http_client.rs
@@ -123,4 +123,83 @@ impl HttpClient {
         // 返回状态码是否为200
         Ok(response.status().as_u16() == 200)
     }
+
+    pub async fn proxy_request_with_headers(
+        &self,
+        target_url: &str,
+        method: &str,
+        headers: std::collections::HashMap<String, String>,
+        body: Vec<u8>,
+    ) -> Result<Response> {
+        log_message(
+            format!("Proxying {} request to {} with headers", method, target_url),
+            "DEBUG".to_string(),
+            MODEL_NAME.to_string(),
+        );
+
+        // 根据HTTP方法构建请求
+        let mut request_builder = match method {
+            "GET" => self.client.get(target_url),
+            "POST" => self.client.post(target_url),
+            _ => {
+                log_message(
+                    format!("Unsupported HTTP method: {}", method),
+                    "ERROR".to_string(),
+                    MODEL_NAME.to_string(),
+                );
+                return Err(anyhow::anyhow!("Unsupported HTTP method"));
+            }
+        };
+
+        // 添加headers
+        for (key, value) in headers {
+            // 在日志记录前先克隆值
+            let key_clone = key.clone();
+            let value_clone = value.clone();
+            
+            log_message(
+                format!("Adding header: {} = {}", key_clone, value_clone),
+                "DEBUG".to_string(),
+                MODEL_NAME.to_string(),
+            );
+
+            request_builder = request_builder.header(key, value);
+        }
+
+        // 添加body并发送请求
+        let response = request_builder.body(body).send().await?;
+        
+        log_message(
+            format!(
+                "Received response: Status={}, Content-Length={:?}",
+                response.status(),
+                response.headers().get("content-length")
+            ),
+            "INFO".to_string(),
+            MODEL_NAME.to_string(),
+        );
+
+        Ok(response)
+    }
+
+    pub async fn send_request_with_headers(
+        &self,
+        target_url: &str,
+        method: &str,
+        headers: std::collections::HashMap<String, String>,
+        body: Vec<u8>
+    ) -> Result<String, String> {
+        let function_name = "send_request_with_headers";
+        log_message(
+            format!("[{}] Sending {} request to {}", function_name, method, target_url),
+            "DEBUG".to_string(),
+            MODEL_NAME.to_string(),
+        );
+
+        let response = self.proxy_request_with_headers(target_url, method, headers, body).await
+            .map_err(|e| format!("Request failed: {}", e))?;
+            
+        response.text().await
+            .map_err(|e| format!("Failed to parse response: {}", e))
+    }
 }
diff --git a/src-tauri/src/main.rs b/src-tauri/src/main.rs
@@ -12,6 +12,7 @@ mod http_client;
 use crate::logger::setup_logging;
 use crate::device_manager::DeviceManager;
 use std::sync::Arc;
+use tauri::Manager; 
 
 #[tokio::main]
 async fn main() -> Result<(), Box<dyn std::error::Error>> {
@@ -42,6 +43,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
             commands::clear_logs,
             commands::get_serial_ports,
             commands::proxy_request,
+            commands::proxy_request_with_headers,
             commands::check_server_status,
         ])
         .run(tauri::generate_context!())
diff --git a/src/components/ByteDanceRecorder.tsx b/src/components/ByteDanceRecorder.tsx
@@ -1,12 +1,13 @@
-import React, { useState, useRef } from 'react';
+import React, { useState, useRef, useEffect } from 'react';
 import { Button } from "@/components/ui/button";
 import { Progress } from "@/components/ui/progress";
 import { Label } from "@/components/ui/label";
 import { Switch } from "@/components/ui/switch";
 import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@/components/ui/select";
 import { Textarea } from "@/components/ui/textarea";
-import { uploadAudioFile } from '../lib/bytedanceTts';
+import { uploadAudioFile, checkVoiceStatus, VoiceStatus, VoiceStatusResponse, VoiceInfo, listAvailableVoices } from '../lib/bytedanceTts';
 import { logger } from '../utils/logger';
+import { Alert, AlertDescription } from "@/components/ui/alert";
 
 const ModelName = "ByteDanceRecorder";
 
@@ -29,6 +30,14 @@ export function ByteDanceRecorder({ onVoiceCloned }: ByteDanceRecorderProps) {
   const [isUploading, setIsUploading] = useState(false);
   const [uploadProgress, setUploadProgress] = useState(0);
   const fileInputRef = useRef<HTMLInputElement>(null);
+  const [currentVoiceId, setCurrentVoiceId] = useState<string | null>(null);
+  const [trainingStatus, setTrainingStatus] = useState<VoiceStatusResponse | null>(null);
+  const [isCheckingStatus, setIsCheckingStatus] = useState(false);
+  const audioRef = useRef<HTMLAudioElement>(null);
+  const [availableVoices, setAvailableVoices] = useState<VoiceInfo[]>([]);
+  const [selectedVoiceId, setSelectedVoiceId] = useState<string>('');
+  const [isLoadingVoices, setIsLoadingVoices] = useState(false);
+  const [loadVoicesError, setLoadVoicesError] = useState<string | null>(null);
 
   // 配置选项
   const [language, setLanguage] = useState<Language>(0);
@@ -37,6 +46,75 @@ export function ByteDanceRecorder({ onVoiceCloned }: ByteDanceRecorderProps) {
   const [noiseReduction, setNoiseReduction] = useState(true);
   const [volumeNormalization, setVolumeNormalization] = useState(true);
 
+  // 加载可用音色列表
+  useEffect(() => {
+    async function loadVoices() {
+      try {
+        setIsLoadingVoices(true);
+        setLoadVoicesError(null);
+        logger.log('Starting to load available voices...', 'INFO', ModelName);
+        
+        const voices = await listAvailableVoices();
+        logger.log(`Successfully loaded ${voices.length} voices`, 'INFO', ModelName);
+        
+        setAvailableVoices(voices);
+        
+        // 如果有可用音色，默认选择第一个
+        if (voices.length > 0) {
+          setSelectedVoiceId(voices[0].speakerId);
+          logger.log(`Selected default voice: ${voices[0].speakerId}`, 'INFO', ModelName);
+        } else {
+          logger.log('No voices available', 'WARN', ModelName);
+        }
+      } catch (error) {
+        const errorMessage = error instanceof Error ? error.message : '未知错误';
+        logger.log(`Failed to load voices: ${errorMessage}`, 'ERROR', ModelName);
+        setLoadVoicesError(errorMessage);
+        alert(`加载音色列表失败: ${errorMessage}`);
+      } finally {
+        setIsLoadingVoices(false);
+      }
+    }
+    
+    loadVoices();
+  }, []);
+
+  // 定期检查训练状态
+  useEffect(() => {
+    let intervalId: NodeJS.Timeout;
+
+    const checkStatus = async () => {
+      if (!currentVoiceId || 
+          (trainingStatus?.status !== VoiceStatus.Training && 
+           trainingStatus?.status !== VoiceStatus.NotFound)) {
+        return;
+      }
+
+      try {
+        setIsCheckingStatus(true);
+        const status = await checkVoiceStatus(currentVoiceId);
+        setTrainingStatus(status);
+
+        if (status.status === VoiceStatus.Success || status.status === VoiceStatus.Active) {
+          onVoiceCloned(currentVoiceId);
+        }
+      } catch (error) {
+        logger.log(`Failed to check voice status: ${error}`, 'ERROR', ModelName);
+      } finally {
+        setIsCheckingStatus(false);
+      }
+    };
+
+    if (currentVoiceId) {
+      checkStatus();
+      intervalId = setInterval(checkStatus, 5000);
+    }
+
+    return () => {
+      if (intervalId) clearInterval(intervalId);
+    };
+  }, [currentVoiceId, trainingStatus?.status]);
+
   const handleFileSelect = async (event: React.ChangeEvent<HTMLInputElement>) => {
     const file = event.target.files?.[0];
     if (!file) return;
@@ -58,24 +136,18 @@ export function ByteDanceRecorder({ onVoiceCloned }: ByteDanceRecorderProps) {
       setIsUploading(true);
       logger.log(`Starting file upload: ${file.name}`, 'INFO', ModelName);
 
-      // 模拟上传进度
-      const progressInterval = setInterval(() => {
-        setUploadProgress(prev => Math.min(prev + 10, 90));
-      }, 500);
-
-      const voiceId = await uploadAudioFile(file, {
+      const voiceId = await uploadAudioFile(file, selectedVoiceId, {
         language,
         modelType,
         textValidation,
         noiseReduction,
         volumeNormalization
       });
       
-      clearInterval(progressInterval);
-      setUploadProgress(100);
+      setCurrentVoiceId(voiceId);
+      setTrainingStatus({ status: VoiceStatus.Training });
       
       logger.log(`File uploaded successfully, voiceId: ${voiceId}`, 'INFO', ModelName);
-      onVoiceCloned(voiceId);
 
     } catch (error) {
       logger.log(`File upload failed: ${error}`, 'ERROR', ModelName);
@@ -89,9 +161,103 @@ export function ByteDanceRecorder({ onVoiceCloned }: ByteDanceRecorderProps) {
     }
   };
 
+  const getStatusDisplay = () => {
+    if (!trainingStatus) return null;
+
+    const statusMessages = {
+      [VoiceStatus.NotFound]: '未找到音色',
+      [VoiceStatus.Training]: '正在训练中...',
+      [VoiceStatus.Success]: '训练成功',
+      [VoiceStatus.Failed]: '训练失败',
+      [VoiceStatus.Active]: '音色可用'
+    };
+
+    const statusColors = {
+      [VoiceStatus.NotFound]: 'bg-gray-100',
+      [VoiceStatus.Training]: 'bg-yellow-100',
+      [VoiceStatus.Success]: 'bg-green-100',
+      [VoiceStatus.Failed]: 'bg-red-100',
+      [VoiceStatus.Active]: 'bg-green-100'
+    };
+
+    const canUseVoice = trainingStatus.status === VoiceStatus.Success || 
+                       trainingStatus.status === VoiceStatus.Active;
+
+    return (
+      <Alert className={statusColors[trainingStatus.status]}>
+        <AlertDescription className="space-y-2">
+          <div className="flex items-center justify-between">
+            <span>{statusMessages[trainingStatus.status]}</span>
+            {trainingStatus.version && (
+              <span className="text-sm text-muted-foreground">版本: {trainingStatus.version}</span>
+            )}
+          </div>
+          
+          {canUseVoice && (
+            <>
+              {trainingStatus.createTime && (
+                <div className="text-sm text-muted-foreground">
+                  创建时间: {new Date(trainingStatus.createTime).toLocaleString()}
+                </div>
+              )}
+              {trainingStatus.demoAudio && (
+                <div className="mt-2">
+                  <Label>试听效果</Label>
+                  <audio 
+                    ref={audioRef} 
+                    src={trainingStatus.demoAudio} 
+                    controls 
+                    className="w-full mt-1"
+                  />
+                  <div className="text-sm text-muted-foreground mt-1">
+                    试听音频链接有效期为1小时
+                  </div>
+                </div>
+              )}
+            </>
+          )}
+        </AlertDescription>
+      </Alert>
+    );
+  };
+
   return (
     <div className="space-y-6">
       <div className="space-y-4">
+        <div className="space-y-2">
+          <Label>选择音色</Label>
+          {isLoadingVoices ? (
+            <div className="text-sm text-muted-foreground">加载音色列表中...</div>
+          ) : loadVoicesError ? (
+            <div className="text-sm text-red-500">
+              加载失败: {loadVoicesError}
+            </div>
+          ) : (
+            <Select
+              value={selectedVoiceId}
+              onValueChange={setSelectedVoiceId}
+            >
+              <SelectTrigger>
+                <SelectValue placeholder="选择要使用的音色" />
+              </SelectTrigger>
+              <SelectContent>
+                {availableVoices.map(voice => (
+                  <SelectItem 
+                    key={voice.speakerId} 
+                    value={voice.speakerId}
+                  >
+                    音色 {voice.speakerId}
+                    {voice.version && ` (${voice.version})`}
+                  </SelectItem>
+                ))}
+              </SelectContent>
+            </Select>
+          )}
+          <div className="text-sm text-muted-foreground">
+            选择要使用的音色ID，每个音色有其特定的声音特征
+          </div>
+        </div>
+
         <input
           type="file"
           ref={fileInputRef}
@@ -101,7 +267,7 @@ export function ByteDanceRecorder({ onVoiceCloned }: ByteDanceRecorderProps) {
         />
         <Button
           onClick={() => fileInputRef.current?.click()}
-          disabled={isUploading}
+          disabled={isUploading || !selectedVoiceId}
           variant="outline"
           className="w-full"
         >
@@ -188,6 +354,8 @@ export function ByteDanceRecorder({ onVoiceCloned }: ByteDanceRecorderProps) {
         </div>
       </div>
 
+      {getStatusDisplay()}
+
       <div className="text-sm text-muted-foreground">
         注意：
         <ul className="list-disc pl-4 space-y-1">
@@ -196,6 +364,10 @@ export function ByteDanceRecorder({ onVoiceCloned }: ByteDanceRecorderProps) {
           <li>支持mp3、wav、ogg、m4a、aac格式</li>
           <li>复刻的音色将在7天内未使用时自动删除</li>
           <li>使用2.0版本时，请确保音频语言与选择的语言一致</li>
+          <li>训练完成后可以试听效果</li>
+          <li>训练成功后音色ID将自动保存</li>
+          <li>每个音色ID代表一种独特的声音特征</li>
+          <li>请先选择音色ID再上传音频文件</li>
         </ul>
       </div>
     </div>
diff --git a/src/lib/bytedanceTts.ts b/src/lib/bytedanceTts.ts