Skip to content

Commit 8c360ca

Browse files
바견규바견규
바견규
authored and
바견규
committed
음성 websocket 전송 기능 구현 (accuracy 부족)
1 parent af67041 commit 8c360ca

File tree

2 files changed

+223
-74
lines changed

2 files changed

+223
-74
lines changed

Sources/ViewModels/Audio/AudioStream.swift

Lines changed: 116 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
// AudioStream.swift
2+
// Lecture2Quiz
3+
//
4+
// Created by 바견규 on 4/27/25.
5+
//
6+
17
import AVFoundation
28

39
class AudioStreamer {
@@ -6,48 +12,48 @@ class AudioStreamer {
612
private var inputFormat: AVAudioFormat?
713
private var isPaused: Bool = false
814
private var audioWebSocket: AudioWebSocket?
9-
private var converter: AVAudioConverter?
15+
private var partialBuffer = Data() // 🔄 남은 청크 보관
1016

1117
// WhisperLive 설정에 맞춘 포맷
12-
private var bufferSize: AVAudioFrameCount = 4096
18+
private var bufferSize: AVAudioFrameCount = 1600 // 100ms 기준
1319
private var sampleRate: Double = 16000
1420
private var channels: UInt32 = 1
1521

22+
// 🔄 리샘플링을 위한 오디오 컨버터
23+
private var converter: AVAudioConverter?
24+
1625
init(webSocket: AudioWebSocket) {
1726
self.inputNode = engine.inputNode
1827
self.audioWebSocket = webSocket
28+
29+
// 💡 리샘플링 포맷 설정
30+
let inputFormat = inputNode.outputFormat(forBus: 0)
31+
print("🔍 입력 포맷: \(inputFormat)")
32+
33+
// 🔄 WhisperLive가 기대하는 16kHz Int16 포맷 생성
34+
let outputFormat = AVAudioFormat(commonFormat: .pcmFormatInt16,
35+
sampleRate: 16000,
36+
channels: 1,
37+
interleaved: true)!
38+
39+
// 🔄 오디오 변환기 생성
40+
self.converter = AVAudioConverter(from: inputFormat, to: outputFormat)
41+
self.inputFormat = outputFormat
1942
}
2043

2144
// MARK: - 오디오 세션 설정
2245
func configureAudioSession() {
2346
let session = AVAudioSession.sharedInstance()
2447
do {
25-
try session.setCategory(.playAndRecord, mode: .voiceChat, options: [.allowBluetooth, .defaultToSpeaker])
26-
try session.setActive(true)
27-
28-
// 🔍 사용 가능한 오디오 입력 디바이스 탐색
29-
if let availableInputs = session.availableInputs {
30-
for input in availableInputs {
31-
print("🔎 입력 디바이스 발견: \(input.portType.rawValue)")
32-
if input.portType == .bluetoothHFP || input.portType == .bluetoothLE {
33-
try session.setPreferredInput(input)
34-
print("🎧 에어팟이 입력 디바이스로 설정되었습니다.")
35-
}
36-
}
37-
}
38-
39-
// ✅ 실제 하드웨어 포맷 가져오기
40-
let inputSampleRate = session.sampleRate
41-
let inputChannels = UInt32(session.inputNumberOfChannels)
42-
print("🎙️ 설정된 샘플레이트: \(inputSampleRate)")
43-
print("🎙️ 설정된 채널 수: \(inputChannels)")
44-
45-
// ✅ 샘플레이트를 16000으로 변환하도록 설정
46-
let inputFormat = AVAudioFormat(commonFormat: .pcmFormatFloat32, sampleRate: inputSampleRate, channels: inputChannels, interleaved: false)!
47-
let outputFormat = AVAudioFormat(commonFormat: .pcmFormatFloat32, sampleRate: 16000, channels: inputChannels, interleaved: false)!
48-
49-
converter = AVAudioConverter(from: inputFormat, to: outputFormat)
50-
48+
try session.setCategory(.playAndRecord, mode: .default, options: [.allowBluetooth, .defaultToSpeaker])
49+
try session.setPreferredSampleRate(48000)
50+
try session.setPreferredInputNumberOfChannels(1) // Mono로 강제 설정
51+
try session.setMode(.measurement)
52+
try session.setActive(true, options: .notifyOthersOnDeactivation)
53+
sampleRate = session.sampleRate
54+
channels = UInt32(session.inputNumberOfChannels)
55+
print("🎙️ 설정된 샘플레이트: \(sampleRate)")
56+
print("🎙️ 설정된 채널 수: \(channels)")
5157
} catch {
5258
print("🔴 오디오 세션 설정 실패: \(error.localizedDescription)")
5359
}
@@ -57,13 +63,19 @@ class AudioStreamer {
5763
func startStreaming() {
5864
configureAudioSession()
5965

60-
let format = inputNode.outputFormat(forBus: 0)
61-
self.inputFormat = format
62-
66+
let format = AVAudioFormat(commonFormat: .pcmFormatFloat32,
67+
sampleRate: 48000,
68+
channels: channels,
69+
interleaved: true)
6370

64-
self.inputFormat = format
71+
guard let hardwareFormat = format else {
72+
print("⚠️ 오디오 포맷 생성 실패")
73+
return
74+
}
75+
76+
self.inputFormat = hardwareFormat
6577

66-
inputNode.installTap(onBus: 0, bufferSize: bufferSize, format: format) { [weak self] buffer, _ in
78+
inputNode.installTap(onBus: 0, bufferSize: bufferSize, format: hardwareFormat) { [weak self] buffer, _ in
6779
self?.processAudioBuffer(buffer)
6880
}
6981

@@ -75,58 +87,104 @@ class AudioStreamer {
7587
}
7688
}
7789

78-
// MARK: - 오디오 버퍼를 WebSocket으로 서버로 전송
90+
// MARK: - 오디오 버퍼를 WebSocket으로 전송
7991
func processAudioBuffer(_ buffer: AVAudioPCMBuffer) {
80-
guard let converter = converter else { return }
92+
guard let converter = self.converter else {
93+
print("❌ 오디오 컨버터 생성 실패")
94+
return
95+
}
8196

82-
let outputBuffer = AVAudioPCMBuffer(pcmFormat: converter.outputFormat, frameCapacity: buffer.frameCapacity)!
83-
var error: NSError?
97+
// 🔍 **RMS 계산**
98+
if let floatChannelData = buffer.floatChannelData {
99+
let frameLength = Int(buffer.frameLength)
100+
let channelDataValue = Array(UnsafeBufferPointer(start: floatChannelData.pointee, count: frameLength))
101+
102+
// 🔄 RMS 계산
103+
let rms = sqrt(channelDataValue.map { $0 * $0 }.reduce(0, +) / Float(frameLength))
104+
print("🔊 오디오 RMS 값: \(rms)")
105+
106+
// 🔍 너무 작으면 경고 로그 출력
107+
if rms < 0.01 {
108+
print("⚠️ 볼륨이 너무 작습니다.")
109+
}
110+
}
111+
112+
let outputFormat = AVAudioFormat(commonFormat: .pcmFormatInt16,
113+
sampleRate: 16000,
114+
channels: 1,
115+
interleaved: true)!
116+
117+
guard let newBuffer = AVAudioPCMBuffer(pcmFormat: outputFormat, frameCapacity: 1600) else {
118+
print("❌ PCM Buffer 생성 실패")
119+
return
120+
}
84121

85-
let inputBlock: AVAudioConverterInputBlock = { inNumPackets, outStatus in
122+
let inputBlock: AVAudioConverterInputBlock = { _, outStatus in
86123
outStatus.pointee = .haveData
87124
return buffer
88125
}
89126

90-
converter.convert(to: outputBuffer, error: &error, withInputFrom: inputBlock)
127+
var error: NSError?
128+
converter.convert(to: newBuffer, error: &error, withInputFrom: inputBlock)
91129

92130
if let error = error {
93-
print("🔴 변환 중 에러: \(error.localizedDescription)")
131+
print("❌ 오디오 변환 실패: \(error.localizedDescription)")
94132
return
95133
}
96134

97-
if let audioData = convertBufferTo16BitPCM(outputBuffer) {
98-
print("🔄 PCM 데이터 전송 중...")
99-
audioWebSocket?.sendDataToServer(audioData)
100-
} else {
101-
print("Error: Audio buffer 변환 실패")
135+
print("📝 변환된 Buffer Frame Length: \(newBuffer.frameLength), Sample Rate: \(newBuffer.format.sampleRate)")
136+
137+
if let audioData = convertToFloat32BytesLikePython(newBuffer) {
138+
var completeData = partialBuffer + audioData
139+
let chunkSize = 4096
140+
141+
while completeData.count >= chunkSize {
142+
let chunk = completeData.prefix(chunkSize)
143+
audioWebSocket?.sendDataToServer(chunk)
144+
print("🔄 오디오 데이터 전송 성공: 4096 바이트")
145+
completeData.removeFirst(chunkSize)
146+
}
147+
148+
partialBuffer = completeData
102149
}
103150
}
104151

105-
// MARK: - 32bit float PCM -> 16bit int PCM 변환
106-
func convertBufferTo16BitPCM(_ buffer: AVAudioPCMBuffer) -> Data? {
107-
guard let floatChannelData = buffer.floatChannelData else {
108-
print("floatChannelData is nil")
152+
153+
154+
// MARK: - Python의 bytes_to_float_array 메소드와 유사하게 변환
155+
func convertToFloat32BytesLikePython(_ buffer: AVAudioPCMBuffer) -> Data? {
156+
guard let int16ChannelData = buffer.int16ChannelData else {
157+
print("int16ChannelData is nil")
109158
return nil
110159
}
111160

112-
let channelPointer = floatChannelData.pointee
113161
let frameLength = Int(buffer.frameLength)
114-
var pcmData = Data(capacity: frameLength * MemoryLayout<Int16>.size)
115-
162+
let channelPointer = int16ChannelData.pointee
163+
164+
// Float32 배열 생성
165+
var floatArray = [Float32](repeating: 0, count: frameLength)
166+
167+
// Int16 -> Float32 정규화 (Python과 동일한 방식)
116168
for i in 0..<frameLength {
117-
let sample = max(-1.0, min(1.0, channelPointer[i])) // 클리핑 처리
118-
var intSample = Int16(sample * Float(Int16.max))
119-
pcmData.append(Data(bytes: &intSample, count: MemoryLayout<Int16>.size))
120-
}
169+
let int16Value = channelPointer[i]
170+
// Python의 정규화 방식: value.astype(np.float32) / 32768.0
171+
floatArray[i] = Float32(Int16(littleEndian: int16Value)) / 32768.0
121172

122-
return pcmData
173+
}
174+
175+
// Float32 배열을 바이트로 변환 (Python의 tobytes()와 동일)
176+
let floatData = Data(bytes: floatArray, count: frameLength * MemoryLayout<Float32>.size)
177+
178+
print("🔄 Python 스타일로 Float32로 변환 완료 - \(floatData.count) bytes")
179+
return floatData
123180
}
124181

125182
// MARK: - 오디오 스트리밍 일시 정지
126183
func pauseStreaming() {
127184
guard !isPaused else { return }
128185
inputNode.removeTap(onBus: 0)
129186
isPaused = true
187+
print("⏸️ 오디오 스트리밍 일시 정지됨")
130188
}
131189

132190
// MARK: - 오디오 스트리밍 재개
@@ -141,6 +199,7 @@ class AudioStreamer {
141199
self?.processAudioBuffer(buffer)
142200
}
143201
isPaused = false
202+
print("▶️ 오디오 스트리밍 재개됨")
144203
}
145204

146205
// MARK: - 오디오 스트리밍 중지
@@ -150,4 +209,3 @@ class AudioStreamer {
150209
print("🛑 AVAudioEngine 중지됨")
151210
}
152211
}
153-

0 commit comments

Comments
 (0)