Skip to content

Commit 4e8e072

Browse files
authored
Add support for reading WAV files that contain MP3 data. (#447)
* Add support for compressed WAV files. * Clean up enums. * Add test for MP3-in-WAV. * clang-format * Fix fixture path.
1 parent e2b4fff commit 4e8e072

File tree

4 files changed

+261
-1
lines changed

4 files changed

+261
-1
lines changed

pedalboard/io/AudioFile.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
#include "../juce_overrides/juce_PatchedFLACAudioFormat.h"
2121
#include "../juce_overrides/juce_PatchedMP3AudioFormat.h"
22+
#include "../juce_overrides/juce_PatchedWavAudioFormat.h"
2223
#include "AudioFile.h"
2324
#include "LameMP3AudioFormat.h"
2425

@@ -32,7 +33,7 @@ static constexpr const unsigned int DEFAULT_AUDIO_BUFFER_SIZE_FRAMES = 8192;
3233
*/
3334
void registerPedalboardAudioFormats(juce::AudioFormatManager &manager,
3435
bool forWriting) {
35-
manager.registerFormat(new juce::WavAudioFormat(), true);
36+
manager.registerFormat(new juce::PatchedWavAudioFormat(), true);
3637
manager.registerFormat(new juce::AiffAudioFormat(), false);
3738
manager.registerFormat(new juce::PatchedFlacAudioFormat(), false);
3839

Lines changed: 239 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,239 @@
1+
/*
2+
* pedalboard
3+
* Copyright 2022 Spotify AB
4+
*
5+
* Licensed under the GNU Public License, Version 3.0 (the "License");
6+
* you may not use this file except in compliance with the License.
7+
* You may obtain a copy of the License at
8+
*
9+
* https://www.gnu.org/licenses/gpl-3.0.html
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
#pragma once
19+
20+
#include "../JuceHeader.h"
21+
#include "juce_PatchedMP3AudioFormat.h"
22+
23+
namespace juce {
24+
25+
/**
26+
* WAV format tags from mmreg.h / RFC 2361.
27+
* https://www.rfc-editor.org/rfc/rfc2361.html
28+
*/
29+
enum class WavFormatTag : unsigned short {
30+
PCM = 0x0001,
31+
ADPCM = 0x0002,
32+
IEEEFloat = 0x0003,
33+
ALaw = 0x0006,
34+
MuLaw = 0x0007,
35+
IMAADPCM = 0x0011,
36+
GSM610 = 0x0031,
37+
MPEG = 0x0050,
38+
MPEGLayer3 = 0x0055,
39+
Extensible = 0xFFFE,
40+
};
41+
42+
/**
43+
* A patched version of WavAudioFormat that adds support for WAV files
44+
* containing compressed audio data (e.g.: WAVE_FORMAT_MPEGLAYER3).
45+
*
46+
* These files are valid WAV files that use MP3 compression for the audio
47+
* data, wrapped in a standard RIFF/WAV container.
48+
*/
49+
class JUCE_API PatchedWavAudioFormat : public WavAudioFormat {
50+
public:
51+
PatchedWavAudioFormat() : WavAudioFormat() {}
52+
~PatchedWavAudioFormat() override {}
53+
54+
AudioFormatReader *createReaderFor(InputStream *sourceStream,
55+
bool deleteStreamIfOpeningFails) override {
56+
auto streamStartPos = sourceStream->getPosition();
57+
58+
// Helper to delegate to the parent WavAudioFormat implementation
59+
auto useDefaultReader = [&]() {
60+
sourceStream->setPosition(streamStartPos);
61+
return WavAudioFormat::createReaderFor(sourceStream,
62+
deleteStreamIfOpeningFails);
63+
};
64+
65+
// Read the RIFF header
66+
auto firstChunkType = sourceStream->readInt();
67+
if (firstChunkType != chunkName("RIFF") &&
68+
firstChunkType != chunkName("RF64")) {
69+
return useDefaultReader();
70+
}
71+
72+
// Skip the size field
73+
sourceStream->readInt();
74+
75+
// Check for WAVE identifier
76+
if (sourceStream->readInt() != chunkName("WAVE")) {
77+
return useDefaultReader();
78+
}
79+
80+
// Look for the fmt chunk to check the format tag
81+
while (!sourceStream->isExhausted()) {
82+
auto chunkType = sourceStream->readInt();
83+
auto length = (uint32)sourceStream->readInt();
84+
auto chunkEnd = sourceStream->getPosition() + length + (length & 1);
85+
86+
if (chunkType == chunkName("fmt ")) {
87+
auto format = (unsigned short)sourceStream->readShort();
88+
89+
switch (static_cast<WavFormatTag>(format)) {
90+
case WavFormatTag::MPEGLayer3:
91+
return createMP3ReaderForWav(sourceStream, chunkEnd, streamStartPos,
92+
deleteStreamIfOpeningFails);
93+
94+
default:
95+
// Check for known-but-unsupported formats and throw helpful errors
96+
const char *unsupportedCodecName = getUnsupportedCodecName(format);
97+
if (unsupportedCodecName != nullptr) {
98+
if (deleteStreamIfOpeningFails)
99+
delete sourceStream;
100+
throw std::domain_error(
101+
"This WAV file uses the " + std::string(unsupportedCodecName) +
102+
" audio codec (format tag 0x" + toHexString(format) +
103+
"), which is not supported. "
104+
"Please convert the file to a standard PCM WAV, FLAC, or MP3 "
105+
"format before loading.");
106+
}
107+
108+
// All other formats: delegate to JUCE's WavAudioFormat
109+
return useDefaultReader();
110+
}
111+
}
112+
113+
sourceStream->setPosition(chunkEnd);
114+
}
115+
116+
// Couldn't find fmt chunk - let JUCE handle it
117+
return useDefaultReader();
118+
}
119+
120+
private:
121+
/**
122+
* Creates an MP3 reader for a WAV file containing MP3-compressed audio data.
123+
* Finds the data chunk and wraps it in a SubregionStream for the MP3 decoder.
124+
*/
125+
AudioFormatReader *createMP3ReaderForWav(InputStream *sourceStream,
126+
int64 fmtChunkEnd,
127+
int64 streamStartPos,
128+
bool deleteStreamIfOpeningFails) {
129+
sourceStream->setPosition(fmtChunkEnd);
130+
131+
while (!sourceStream->isExhausted()) {
132+
auto dataChunkType = sourceStream->readInt();
133+
auto dataLength = (uint32)sourceStream->readInt();
134+
135+
if (dataChunkType == chunkName("data")) {
136+
// Found the data chunk - the MP3 data starts here
137+
auto dataStart = sourceStream->getPosition();
138+
139+
// Create a SubregionStream that only reads the MP3 data
140+
auto subStream = std::make_unique<SubregionStream>(
141+
sourceStream, dataStart, dataLength, deleteStreamIfOpeningFails);
142+
143+
// Use the patched MP3 format to read the MP3 data
144+
PatchedMP3AudioFormat mp3Format;
145+
return mp3Format.createReaderFor(subStream.release(), true);
146+
}
147+
148+
sourceStream->setPosition(sourceStream->getPosition() + dataLength +
149+
(dataLength & 1));
150+
}
151+
152+
// Couldn't find data chunk
153+
sourceStream->setPosition(streamStartPos);
154+
if (deleteStreamIfOpeningFails)
155+
delete sourceStream;
156+
return nullptr;
157+
}
158+
159+
static constexpr int chunkName(const char *name) noexcept {
160+
return (int)ByteOrder::littleEndianInt(name);
161+
}
162+
163+
static std::string toHexString(unsigned short value) {
164+
char buf[8];
165+
snprintf(buf, sizeof(buf), "%04X", value);
166+
return std::string(buf);
167+
}
168+
169+
/**
170+
* Returns a human-readable name for known-but-unsupported WAV codec formats.
171+
* Returns nullptr for unknown formats (which will get a generic error).
172+
*/
173+
static const char *getUnsupportedCodecName(unsigned short format) {
174+
// clang-format off
175+
// Format tags from mmreg.h / RFC 2361: https://www.rfc-editor.org/rfc/rfc2361.html
176+
switch (format) {
177+
case 0x0002: return "Microsoft ADPCM";
178+
case 0x0006: return "A-law";
179+
case 0x0007: return "mu-law (u-law)";
180+
case 0x0010: return "OKI ADPCM";
181+
case 0x0011: return "IMA ADPCM (DVI ADPCM)";
182+
case 0x0012: return "MediaSpace ADPCM";
183+
case 0x0013: return "Sierra ADPCM";
184+
case 0x0014: return "G.723 ADPCM";
185+
case 0x0015: return "DIGISTD";
186+
case 0x0016: return "DIGIFIX";
187+
case 0x0017: return "Dialogic OKI ADPCM";
188+
case 0x0020: return "Yamaha ADPCM";
189+
case 0x0021: return "SONARC";
190+
case 0x0022: return "DSP Group TrueSpeech";
191+
case 0x0023: return "ECHOSC1";
192+
case 0x0024: return "Audiofile AF36";
193+
case 0x0025: return "APTX";
194+
case 0x0026: return "Audiofile AF10";
195+
case 0x0030: return "Dolby AC-2";
196+
case 0x0031: return "GSM 6.10";
197+
case 0x0040: return "G.721 ADPCM";
198+
case 0x0041: return "G.728 CELP";
199+
case 0x0050: return "MPEG";
200+
case 0x0052: return "RT24";
201+
case 0x0053: return "PAC";
202+
case 0x0061: return "G.726 ADPCM";
203+
case 0x0062: return "G.722 ADPCM";
204+
case 0x0064: return "G.722.1";
205+
case 0x0065: return "G.728";
206+
case 0x0066: return "G.726";
207+
case 0x0067: return "G.722";
208+
case 0x0069: return "G.729";
209+
case 0x0070: return "VSELP";
210+
case 0x0075: return "VOXWARE";
211+
case 0x00FF: return "AAC";
212+
case 0x0111: return "VIVO G.723";
213+
case 0x0112: return "VIVO Siren";
214+
case 0x0160: return "Windows Media Audio v1";
215+
case 0x0161: return "Windows Media Audio v2";
216+
case 0x0162: return "Windows Media Audio Pro";
217+
case 0x0163: return "Windows Media Audio Lossless";
218+
case 0x0200: return "Creative ADPCM";
219+
case 0x0202: return "Creative FastSpeech8";
220+
case 0x0203: return "Creative FastSpeech10";
221+
case 0x1000: return "Olivetti GSM";
222+
case 0x1001: return "Olivetti ADPCM";
223+
case 0x1002: return "Olivetti CELP";
224+
case 0x1003: return "Olivetti SBC";
225+
case 0x1004: return "Olivetti OPR";
226+
case 0x1100: return "LH Codec";
227+
case 0x1400: return "Norris";
228+
case 0x1500: return "SoundSpace Musicompress";
229+
case 0x2000: return "Dolby AC-3 (SPDIF)";
230+
case 0x2001: return "DTS";
231+
default: return nullptr;
232+
}
233+
// clang-format on
234+
}
235+
236+
JUCE_DECLARE_NON_COPYABLE_WITH_LEAK_DETECTOR(PatchedWavAudioFormat)
237+
};
238+
239+
} // namespace juce

tests/audio/correct/mp3_in_wav.wav

41.9 KB
Binary file not shown.

tests/test_io.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1290,6 +1290,26 @@ def test_real_mp3_parsing_with_no_channels():
12901290
assert f.read(f.frames).shape == (0, 0)
12911291

12921292

1293+
def test_mp3_in_wav_format():
1294+
"""
1295+
Test reading WAV files that contain MP3-compressed audio data
1296+
(WAVE_FORMAT_MPEGLAYER3, format tag 0x55).
1297+
1298+
This is a valid but unusual format where a WAV container holds MP3 data.
1299+
Some audio software produces files in this format.
1300+
"""
1301+
filename = os.path.join(os.path.dirname(__file__), "audio", "correct", "mp3_in_wav.wav")
1302+
with pedalboard.io.AudioFile(filename) as f:
1303+
assert f.samplerate == 44100
1304+
assert f.num_channels == 1
1305+
assert f.frames >= 44100 # At least 1 second of audio
1306+
1307+
# Read the audio and verify it's not silent
1308+
audio = f.read(f.frames)
1309+
assert audio.shape[0] == 1
1310+
assert np.amax(np.abs(audio)) > 0.1 # Should have actual audio content
1311+
1312+
12931313
@pytest.mark.parametrize("samplerate", [44100, 32000])
12941314
@pytest.mark.parametrize("chunk_size", [1, 2, 16])
12951315
@pytest.mark.parametrize("target_samplerate", [44100, 32000, 22050, 1234.56])

0 commit comments

Comments
 (0)