Skip to content

웹소켓 통신 구현완료(accuracy 부족) #4

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 30, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified .DS_Store
Binary file not shown.
174 changes: 116 additions & 58 deletions Sources/ViewModels/Audio/AudioStream.swift
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
// AudioStream.swift
// Lecture2Quiz
//
// Created by 바견규 on 4/27/25.
//

import AVFoundation

class AudioStreamer {
Expand All @@ -6,48 +12,48 @@ class AudioStreamer {
private var inputFormat: AVAudioFormat?
private var isPaused: Bool = false
private var audioWebSocket: AudioWebSocket?
private var converter: AVAudioConverter?
private var partialBuffer = Data() // 🔄 남은 청크 보관

// WhisperLive 설정에 맞춘 포맷
private var bufferSize: AVAudioFrameCount = 4096
private var bufferSize: AVAudioFrameCount = 1600 // 100ms 기준
private var sampleRate: Double = 16000
private var channels: UInt32 = 1

// 🔄 리샘플링을 위한 오디오 컨버터
private var converter: AVAudioConverter?

init(webSocket: AudioWebSocket) {
self.inputNode = engine.inputNode
self.audioWebSocket = webSocket

// 💡 리샘플링 포맷 설정
let inputFormat = inputNode.outputFormat(forBus: 0)
print("🔍 입력 포맷: \(inputFormat)")

// 🔄 WhisperLive가 기대하는 16kHz Int16 포맷 생성
let outputFormat = AVAudioFormat(commonFormat: .pcmFormatInt16,
sampleRate: 16000,
channels: 1,
interleaved: true)!

// 🔄 오디오 변환기 생성
self.converter = AVAudioConverter(from: inputFormat, to: outputFormat)
self.inputFormat = outputFormat
}

// MARK: - 오디오 세션 설정
func configureAudioSession() {
let session = AVAudioSession.sharedInstance()
do {
try session.setCategory(.playAndRecord, mode: .voiceChat, options: [.allowBluetooth, .defaultToSpeaker])
try session.setActive(true)

// 🔍 사용 가능한 오디오 입력 디바이스 탐색
if let availableInputs = session.availableInputs {
for input in availableInputs {
print("🔎 입력 디바이스 발견: \(input.portType.rawValue)")
if input.portType == .bluetoothHFP || input.portType == .bluetoothLE {
try session.setPreferredInput(input)
print("🎧 에어팟이 입력 디바이스로 설정되었습니다.")
}
}
}

// ✅ 실제 하드웨어 포맷 가져오기
let inputSampleRate = session.sampleRate
let inputChannels = UInt32(session.inputNumberOfChannels)
print("🎙️ 설정된 샘플레이트: \(inputSampleRate)")
print("🎙️ 설정된 채널 수: \(inputChannels)")

// ✅ 샘플레이트를 16000으로 변환하도록 설정
let inputFormat = AVAudioFormat(commonFormat: .pcmFormatFloat32, sampleRate: inputSampleRate, channels: inputChannels, interleaved: false)!
let outputFormat = AVAudioFormat(commonFormat: .pcmFormatFloat32, sampleRate: 16000, channels: inputChannels, interleaved: false)!

converter = AVAudioConverter(from: inputFormat, to: outputFormat)

try session.setCategory(.playAndRecord, mode: .default, options: [.allowBluetooth, .defaultToSpeaker])
try session.setPreferredSampleRate(48000)
try session.setPreferredInputNumberOfChannels(1) // Mono로 강제 설정
try session.setMode(.measurement)
try session.setActive(true, options: .notifyOthersOnDeactivation)
sampleRate = session.sampleRate
channels = UInt32(session.inputNumberOfChannels)
print("🎙️ 설정된 샘플레이트: \(sampleRate)")
print("🎙️ 설정된 채널 수: \(channels)")
} catch {
print("🔴 오디오 세션 설정 실패: \(error.localizedDescription)")
}
Expand All @@ -57,13 +63,19 @@ class AudioStreamer {
func startStreaming() {
configureAudioSession()

let format = inputNode.outputFormat(forBus: 0)
self.inputFormat = format

let format = AVAudioFormat(commonFormat: .pcmFormatFloat32,
sampleRate: 48000,
channels: channels,
interleaved: true)

self.inputFormat = format
guard let hardwareFormat = format else {
print("⚠️ 오디오 포맷 생성 실패")
return
}

self.inputFormat = hardwareFormat

inputNode.installTap(onBus: 0, bufferSize: bufferSize, format: format) { [weak self] buffer, _ in
inputNode.installTap(onBus: 0, bufferSize: bufferSize, format: hardwareFormat) { [weak self] buffer, _ in
self?.processAudioBuffer(buffer)
}

Expand All @@ -75,58 +87,104 @@ class AudioStreamer {
}
}

// MARK: - 오디오 버퍼를 WebSocket으로 서버로 전송
// MARK: - 오디오 버퍼를 WebSocket으로 전송
func processAudioBuffer(_ buffer: AVAudioPCMBuffer) {
guard let converter = converter else { return }
guard let converter = self.converter else {
print("❌ 오디오 컨버터 생성 실패")
return
}

let outputBuffer = AVAudioPCMBuffer(pcmFormat: converter.outputFormat, frameCapacity: buffer.frameCapacity)!
var error: NSError?
// 🔍 **RMS 계산**
if let floatChannelData = buffer.floatChannelData {
let frameLength = Int(buffer.frameLength)
let channelDataValue = Array(UnsafeBufferPointer(start: floatChannelData.pointee, count: frameLength))

// 🔄 RMS 계산
let rms = sqrt(channelDataValue.map { $0 * $0 }.reduce(0, +) / Float(frameLength))
print("🔊 오디오 RMS 값: \(rms)")

// 🔍 너무 작으면 경고 로그 출력
if rms < 0.01 {
print("⚠️ 볼륨이 너무 작습니다.")
}
}

let outputFormat = AVAudioFormat(commonFormat: .pcmFormatInt16,
sampleRate: 16000,
channels: 1,
interleaved: true)!

guard let newBuffer = AVAudioPCMBuffer(pcmFormat: outputFormat, frameCapacity: 1600) else {
print("❌ PCM Buffer 생성 실패")
return
}

let inputBlock: AVAudioConverterInputBlock = { inNumPackets, outStatus in
let inputBlock: AVAudioConverterInputBlock = { _, outStatus in
outStatus.pointee = .haveData
return buffer
}

converter.convert(to: outputBuffer, error: &error, withInputFrom: inputBlock)
var error: NSError?
converter.convert(to: newBuffer, error: &error, withInputFrom: inputBlock)

if let error = error {
print("🔴 변환 중 에러: \(error.localizedDescription)")
print("❌ 오디오 변환 실패: \(error.localizedDescription)")
return
}

if let audioData = convertBufferTo16BitPCM(outputBuffer) {
print("🔄 PCM 데이터 전송 중...")
audioWebSocket?.sendDataToServer(audioData)
} else {
print("Error: Audio buffer 변환 실패")
print("📝 변환된 Buffer Frame Length: \(newBuffer.frameLength), Sample Rate: \(newBuffer.format.sampleRate)")

if let audioData = convertToFloat32BytesLikePython(newBuffer) {
var completeData = partialBuffer + audioData
let chunkSize = 4096

while completeData.count >= chunkSize {
let chunk = completeData.prefix(chunkSize)
audioWebSocket?.sendDataToServer(chunk)
print("🔄 오디오 데이터 전송 성공: 4096 바이트")
completeData.removeFirst(chunkSize)
}

partialBuffer = completeData
}
}

// MARK: - 32bit float PCM -> 16bit int PCM 변환
func convertBufferTo16BitPCM(_ buffer: AVAudioPCMBuffer) -> Data? {
guard let floatChannelData = buffer.floatChannelData else {
print("floatChannelData is nil")


// MARK: - Python의 bytes_to_float_array 메소드와 유사하게 변환
func convertToFloat32BytesLikePython(_ buffer: AVAudioPCMBuffer) -> Data? {
guard let int16ChannelData = buffer.int16ChannelData else {
print("int16ChannelData is nil")
return nil
}

let channelPointer = floatChannelData.pointee
let frameLength = Int(buffer.frameLength)
var pcmData = Data(capacity: frameLength * MemoryLayout<Int16>.size)

let channelPointer = int16ChannelData.pointee

// Float32 배열 생성
var floatArray = [Float32](repeating: 0, count: frameLength)

// Int16 -> Float32 정규화 (Python과 동일한 방식)
for i in 0..<frameLength {
let sample = max(-1.0, min(1.0, channelPointer[i])) // 클리핑 처리
var intSample = Int16(sample * Float(Int16.max))
pcmData.append(Data(bytes: &intSample, count: MemoryLayout<Int16>.size))
}
let int16Value = channelPointer[i]
// Python의 정규화 방식: value.astype(np.float32) / 32768.0
floatArray[i] = Float32(Int16(littleEndian: int16Value)) / 32768.0

return pcmData
}

// Float32 배열을 바이트로 변환 (Python의 tobytes()와 동일)
let floatData = Data(bytes: floatArray, count: frameLength * MemoryLayout<Float32>.size)

print("🔄 Python 스타일로 Float32로 변환 완료 - \(floatData.count) bytes")
return floatData
}

// MARK: - 오디오 스트리밍 일시 정지
func pauseStreaming() {
guard !isPaused else { return }
inputNode.removeTap(onBus: 0)
isPaused = true
print("⏸️ 오디오 스트리밍 일시 정지됨")
}

// MARK: - 오디오 스트리밍 재개
Expand All @@ -141,6 +199,7 @@ class AudioStreamer {
self?.processAudioBuffer(buffer)
}
isPaused = false
print("▶️ 오디오 스트리밍 재개됨")
}

// MARK: - 오디오 스트리밍 중지
Expand All @@ -150,4 +209,3 @@ class AudioStreamer {
print("🛑 AVAudioEngine 중지됨")
}
}

Loading