1
+ // AudioStream.swift
2
+ // Lecture2Quiz
3
+ //
4
+ // Created by 바견규 on 4/27/25.
5
+ //
6
+
1
7
import AVFoundation
2
8
3
9
class AudioStreamer {
@@ -6,48 +12,48 @@ class AudioStreamer {
6
12
private var inputFormat : AVAudioFormat ?
7
13
private var isPaused : Bool = false
8
14
private var audioWebSocket : AudioWebSocket ?
9
- private var converter : AVAudioConverter ?
15
+ private var partialBuffer = Data ( ) // 🔄 남은 청크 보관
10
16
11
17
// WhisperLive 설정에 맞춘 포맷
12
- private var bufferSize : AVAudioFrameCount = 4096
18
+ private var bufferSize : AVAudioFrameCount = 1600 // 100ms 기준
13
19
private var sampleRate : Double = 16000
14
20
private var channels : UInt32 = 1
15
21
22
+ // 🔄 리샘플링을 위한 오디오 컨버터
23
+ private var converter : AVAudioConverter ?
24
+
16
25
init ( webSocket: AudioWebSocket ) {
17
26
self . inputNode = engine. inputNode
18
27
self . audioWebSocket = webSocket
28
+
29
+ // 💡 리샘플링 포맷 설정
30
+ let inputFormat = inputNode. outputFormat ( forBus: 0 )
31
+ print ( " 🔍 입력 포맷: \( inputFormat) " )
32
+
33
+ // 🔄 WhisperLive가 기대하는 16kHz Int16 포맷 생성
34
+ let outputFormat = AVAudioFormat ( commonFormat: . pcmFormatInt16,
35
+ sampleRate: 16000 ,
36
+ channels: 1 ,
37
+ interleaved: true ) !
38
+
39
+ // 🔄 오디오 변환기 생성
40
+ self . converter = AVAudioConverter ( from: inputFormat, to: outputFormat)
41
+ self . inputFormat = outputFormat
19
42
}
20
43
21
44
// MARK: - 오디오 세션 설정
22
45
func configureAudioSession( ) {
23
46
let session = AVAudioSession . sharedInstance ( )
24
47
do {
25
- try session. setCategory ( . playAndRecord, mode: . voiceChat, options: [ . allowBluetooth, . defaultToSpeaker] )
26
- try session. setActive ( true )
27
-
28
- // 🔍 사용 가능한 오디오 입력 디바이스 탐색
29
- if let availableInputs = session. availableInputs {
30
- for input in availableInputs {
31
- print ( " 🔎 입력 디바이스 발견: \( input. portType. rawValue) " )
32
- if input. portType == . bluetoothHFP || input. portType == . bluetoothLE {
33
- try session. setPreferredInput ( input)
34
- print ( " 🎧 에어팟이 입력 디바이스로 설정되었습니다. " )
35
- }
36
- }
37
- }
38
-
39
- // ✅ 실제 하드웨어 포맷 가져오기
40
- let inputSampleRate = session. sampleRate
41
- let inputChannels = UInt32 ( session. inputNumberOfChannels)
42
- print ( " 🎙️ 설정된 샘플레이트: \( inputSampleRate) " )
43
- print ( " 🎙️ 설정된 채널 수: \( inputChannels) " )
44
-
45
- // ✅ 샘플레이트를 16000으로 변환하도록 설정
46
- let inputFormat = AVAudioFormat ( commonFormat: . pcmFormatFloat32, sampleRate: inputSampleRate, channels: inputChannels, interleaved: false ) !
47
- let outputFormat = AVAudioFormat ( commonFormat: . pcmFormatFloat32, sampleRate: 16000 , channels: inputChannels, interleaved: false ) !
48
-
49
- converter = AVAudioConverter ( from: inputFormat, to: outputFormat)
50
-
48
+ try session. setCategory ( . playAndRecord, mode: . default, options: [ . allowBluetooth, . defaultToSpeaker] )
49
+ try session. setPreferredSampleRate ( 48000 )
50
+ try session. setPreferredInputNumberOfChannels ( 1 ) // Mono로 강제 설정
51
+ try session. setMode ( . measurement)
52
+ try session. setActive ( true , options: . notifyOthersOnDeactivation)
53
+ sampleRate = session. sampleRate
54
+ channels = UInt32 ( session. inputNumberOfChannels)
55
+ print ( " 🎙️ 설정된 샘플레이트: \( sampleRate) " )
56
+ print ( " 🎙️ 설정된 채널 수: \( channels) " )
51
57
} catch {
52
58
print ( " 🔴 오디오 세션 설정 실패: \( error. localizedDescription) " )
53
59
}
@@ -57,13 +63,19 @@ class AudioStreamer {
57
63
func startStreaming( ) {
58
64
configureAudioSession ( )
59
65
60
- let format = inputNode. outputFormat ( forBus: 0 )
61
- self . inputFormat = format
62
-
66
+ let format = AVAudioFormat ( commonFormat: . pcmFormatFloat32,
67
+ sampleRate: 48000 ,
68
+ channels: channels,
69
+ interleaved: true )
63
70
64
- self . inputFormat = format
71
+ guard let hardwareFormat = format else {
72
+ print ( " ⚠️ 오디오 포맷 생성 실패 " )
73
+ return
74
+ }
75
+
76
+ self . inputFormat = hardwareFormat
65
77
66
- inputNode. installTap ( onBus: 0 , bufferSize: bufferSize, format: format ) { [ weak self] buffer, _ in
78
+ inputNode. installTap ( onBus: 0 , bufferSize: bufferSize, format: hardwareFormat ) { [ weak self] buffer, _ in
67
79
self ? . processAudioBuffer ( buffer)
68
80
}
69
81
@@ -75,58 +87,104 @@ class AudioStreamer {
75
87
}
76
88
}
77
89
78
- // MARK: - 오디오 버퍼를 WebSocket으로 서버로 전송
90
+ // MARK: - 오디오 버퍼를 WebSocket으로 전송
79
91
func processAudioBuffer( _ buffer: AVAudioPCMBuffer ) {
80
- guard let converter = converter else { return }
92
+ guard let converter = self . converter else {
93
+ print ( " ❌ 오디오 컨버터 생성 실패 " )
94
+ return
95
+ }
81
96
82
- let outputBuffer = AVAudioPCMBuffer ( pcmFormat: converter. outputFormat, frameCapacity: buffer. frameCapacity) !
83
- var error : NSError ?
97
+ // 🔍 **RMS 계산**
98
+ if let floatChannelData = buffer. floatChannelData {
99
+ let frameLength = Int ( buffer. frameLength)
100
+ let channelDataValue = Array ( UnsafeBufferPointer ( start: floatChannelData. pointee, count: frameLength) )
101
+
102
+ // 🔄 RMS 계산
103
+ let rms = sqrt ( channelDataValue. map { $0 * $0 } . reduce ( 0 , + ) / Float( frameLength) )
104
+ print ( " 🔊 오디오 RMS 값: \( rms) " )
105
+
106
+ // 🔍 너무 작으면 경고 로그 출력
107
+ if rms < 0.01 {
108
+ print ( " ⚠️ 볼륨이 너무 작습니다. " )
109
+ }
110
+ }
111
+
112
+ let outputFormat = AVAudioFormat ( commonFormat: . pcmFormatInt16,
113
+ sampleRate: 16000 ,
114
+ channels: 1 ,
115
+ interleaved: true ) !
116
+
117
+ guard let newBuffer = AVAudioPCMBuffer ( pcmFormat: outputFormat, frameCapacity: 1600 ) else {
118
+ print ( " ❌ PCM Buffer 생성 실패 " )
119
+ return
120
+ }
84
121
85
- let inputBlock : AVAudioConverterInputBlock = { inNumPackets , outStatus in
122
+ let inputBlock : AVAudioConverterInputBlock = { _ , outStatus in
86
123
outStatus. pointee = . haveData
87
124
return buffer
88
125
}
89
126
90
- converter. convert ( to: outputBuffer, error: & error, withInputFrom: inputBlock)
127
+ var error : NSError ?
128
+ converter. convert ( to: newBuffer, error: & error, withInputFrom: inputBlock)
91
129
92
130
if let error = error {
93
- print ( " 🔴 변환 중 에러 : \( error. localizedDescription) " )
131
+ print ( " ❌ 오디오 변환 실패 : \( error. localizedDescription) " )
94
132
return
95
133
}
96
134
97
- if let audioData = convertBufferTo16BitPCM ( outputBuffer) {
98
- print ( " 🔄 PCM 데이터 전송 중... " )
99
- audioWebSocket? . sendDataToServer ( audioData)
100
- } else {
101
- print ( " Error: Audio buffer 변환 실패 " )
135
+ print ( " 📝 변환된 Buffer Frame Length: \( newBuffer. frameLength) , Sample Rate: \( newBuffer. format. sampleRate) " )
136
+
137
+ if let audioData = convertToFloat32BytesLikePython ( newBuffer) {
138
+ var completeData = partialBuffer + audioData
139
+ let chunkSize = 4096
140
+
141
+ while completeData. count >= chunkSize {
142
+ let chunk = completeData. prefix ( chunkSize)
143
+ audioWebSocket? . sendDataToServer ( chunk)
144
+ print ( " 🔄 오디오 데이터 전송 성공: 4096 바이트 " )
145
+ completeData. removeFirst ( chunkSize)
146
+ }
147
+
148
+ partialBuffer = completeData
102
149
}
103
150
}
104
151
105
- // MARK: - 32bit float PCM -> 16bit int PCM 변환
106
- func convertBufferTo16BitPCM( _ buffer: AVAudioPCMBuffer ) -> Data ? {
107
- guard let floatChannelData = buffer. floatChannelData else {
108
- print ( " floatChannelData is nil " )
152
+
153
+
154
+ // MARK: - Python의 bytes_to_float_array 메소드와 유사하게 변환
155
+ func convertToFloat32BytesLikePython( _ buffer: AVAudioPCMBuffer ) -> Data ? {
156
+ guard let int16ChannelData = buffer. int16ChannelData else {
157
+ print ( " int16ChannelData is nil " )
109
158
return nil
110
159
}
111
160
112
- let channelPointer = floatChannelData. pointee
113
161
let frameLength = Int ( buffer. frameLength)
114
- var pcmData = Data ( capacity: frameLength * MemoryLayout< Int16> . size)
115
-
162
+ let channelPointer = int16ChannelData. pointee
163
+
164
+ // Float32 배열 생성
165
+ var floatArray = [ Float32] ( repeating: 0 , count: frameLength)
166
+
167
+ // Int16 -> Float32 정규화 (Python과 동일한 방식)
116
168
for i in 0 ..< frameLength {
117
- let sample = max ( - 1.0 , min ( 1.0 , channelPointer [ i] ) ) // 클리핑 처리
118
- var intSample = Int16 ( sample * Float( Int16 . max) )
119
- pcmData. append ( Data ( bytes: & intSample, count: MemoryLayout< Int16> . size) )
120
- }
169
+ let int16Value = channelPointer [ i]
170
+ // Python의 정규화 방식: value.astype(np.float32) / 32768.0
171
+ floatArray [ i] = Float32 ( Int16 ( littleEndian: int16Value) ) / 32768.0
121
172
122
- return pcmData
173
+ }
174
+
175
+ // Float32 배열을 바이트로 변환 (Python의 tobytes()와 동일)
176
+ let floatData = Data ( bytes: floatArray, count: frameLength * MemoryLayout< Float32> . size)
177
+
178
+ print ( " 🔄 Python 스타일로 Float32로 변환 완료 - \( floatData. count) bytes " )
179
+ return floatData
123
180
}
124
181
125
182
// MARK: - 오디오 스트리밍 일시 정지
126
183
func pauseStreaming( ) {
127
184
guard !isPaused else { return }
128
185
inputNode. removeTap ( onBus: 0 )
129
186
isPaused = true
187
+ print ( " ⏸️ 오디오 스트리밍 일시 정지됨 " )
130
188
}
131
189
132
190
// MARK: - 오디오 스트리밍 재개
@@ -141,6 +199,7 @@ class AudioStreamer {
141
199
self ? . processAudioBuffer ( buffer)
142
200
}
143
201
isPaused = false
202
+ print ( " ▶️ 오디오 스트리밍 재개됨 " )
144
203
}
145
204
146
205
// MARK: - 오디오 스트리밍 중지
@@ -150,4 +209,3 @@ class AudioStreamer {
150
209
print ( " 🛑 AVAudioEngine 중지됨 " )
151
210
}
152
211
}
153
-
0 commit comments