diff --git a/README.md b/README.md index 792e976b4..8d2b2da03 100644 --- a/README.md +++ b/README.md @@ -146,6 +146,25 @@ with AudioFile('processed-output.wav', 'w', samplerate, effected.shape[0]) as f: f.write(effected) ``` +### Resampling and Channel Conversion + +Audio files can be resampled on-the-fly and have their channels converted +for maximum efficiency using chainable methods: + +```python +from pedalboard.io import AudioFile + +# Read a file, resampling to 22,050 Hz and converting to mono: +with AudioFile('some-file.mp3').resampled_to(22_050).mono() as f: + audio = f.read(f.frames) + print(f.samplerate) # => 22050 + print(f.num_channels) # => 1 + +# Resampling and channel conversion can be done in either order: +with AudioFile('some-file.mp3').mono().resampled_to(22_050) as f: + audio = f.read(f.frames) # Also works! (And may be faster for stereo inputs) +``` + ### Using VST3® or Audio Unit instrument and effect plugins ```python diff --git a/docs/source/reference/pedalboard.io.rst b/docs/source/reference/pedalboard.io.rst index f664969bc..d5d0e9a2b 100644 --- a/docs/source/reference/pedalboard.io.rst +++ b/docs/source/reference/pedalboard.io.rst @@ -30,11 +30,54 @@ a regular file in Python:: - Writing to a file can be accomplished by passing ``"w"`` as a second argument, just like with regular files in Python. - Changing the sample rate of a file can be accomplished by calling :py:meth:`pedalboard.io.ReadableAudioFile.resampled_to`. + - Changing the number of channels can be accomplished by calling :py:meth:`pedalboard.io.ReadableAudioFile.mono`, :py:meth:`pedalboard.io.ReadableAudioFile.stereo`, or :py:meth:`pedalboard.io.ReadableAudioFile.with_channels`. If you find yourself importing :class:`pedalboard.io.ReadableAudioFile`, - :class:`pedalboard.io.WriteableAudioFile`, or :class:`pedalboard.io.ResampledReadableAudioFile` directly, + :class:`pedalboard.io.WriteableAudioFile`, :class:`pedalboard.io.ResampledReadableAudioFile`, + or :class:`pedalboard.io.ChannelConvertedReadableAudioFile` directly, you *probably don't need to do that* - :class:`pedalboard.io.AudioFile` has you covered. +Resampling and Channel Conversion +--------------------------------- + +Audio files can be resampled and have their channel count converted on-the-fly +using chainable methods. These operations stream audio efficiently without +loading the entire file into memory:: + + from pedalboard.io import AudioFile + + # Resample a file to 22,050 Hz: + with AudioFile("my_file.mp3").resampled_to(22_050) as f: + audio = f.read(f.frames) # audio is now at 22,050 Hz + + # Convert a stereo file to mono: + with AudioFile("stereo_file.wav").mono() as f: + audio = f.read(f.frames) # audio is now shape (1, num_samples) + + # Convert a mono file to stereo: + with AudioFile("mono_file.wav").stereo() as f: + audio = f.read(f.frames) # audio is now shape (2, num_samples) + +These methods can be chained together in any order:: + + # Resample and convert to mono: + with AudioFile("my_file.mp3").resampled_to(22_050).mono() as f: + audio = f.read(f.frames) + + # Or convert to mono first, then resample (slightly more efficient): + with AudioFile("my_file.mp3").mono().resampled_to(22_050) as f: + audio = f.read(f.frames) + +.. note:: + Channel conversion is only well-defined for conversions to and from mono. + Converting between stereo and multichannel formats (e.g., 5.1 surround) + is not supported, as the mapping between channels is ambiguous. + To convert multichannel audio to stereo, first convert to mono:: + + # Convert 5.1 surround to stereo via mono: + with AudioFile("surround.wav").mono().stereo() as f: + audio = f.read(f.frames) + The following documentation lists all of the available I/O classes. diff --git a/pedalboard/io/AudioFile.h b/pedalboard/io/AudioFile.h index 116d9a7fc..5e3ac94c2 100644 --- a/pedalboard/io/AudioFile.h +++ b/pedalboard/io/AudioFile.h @@ -17,22 +17,32 @@ #pragma once +#include +#include + +#include +#include + #include "../juce_overrides/juce_PatchedFLACAudioFormat.h" #include "../juce_overrides/juce_PatchedMP3AudioFormat.h" #include "../juce_overrides/juce_PatchedWavAudioFormat.h" -#include "AudioFile.h" #include "LameMP3AudioFormat.h" +namespace py = pybind11; + namespace Pedalboard { +// Forward declaration +class PythonInputStream; + static constexpr const unsigned int DEFAULT_AUDIO_BUFFER_SIZE_FRAMES = 8192; /** * Registers audio formats for reading and writing in a deterministic (but * configurable) order. */ -void registerPedalboardAudioFormats(juce::AudioFormatManager &manager, - bool forWriting) { +inline void registerPedalboardAudioFormats(juce::AudioFormatManager &manager, + bool forWriting) { manager.registerFormat(new juce::PatchedWavAudioFormat(), true); manager.registerFormat(new juce::AiffAudioFormat(), false); manager.registerFormat(new juce::PatchedFlacAudioFormat(), false); @@ -57,6 +67,61 @@ void registerPedalboardAudioFormats(juce::AudioFormatManager &manager, #endif } +/** + * Base marker class for all audio file types. + */ class AudioFile {}; +/** + * Abstract interface for readable audio files. + * + * This interface defines the common API shared by ReadableAudioFile, + * ResampledReadableAudioFile, and ChannelConvertedReadableAudioFile, + * allowing them to be used interchangeably. + */ +class AbstractReadableAudioFile : public AudioFile { +public: + virtual ~AbstractReadableAudioFile() = default; + + // Sample rate and duration + virtual std::variant getSampleRate() const = 0; + virtual double getSampleRateAsDouble() const = 0; + virtual long long getLengthInSamples() const = 0; + virtual double getDuration() const = 0; + + // Channel info + virtual long getNumChannels() const = 0; + + // File metadata + virtual bool exactDurationKnown() const = 0; + virtual std::string getFileFormat() const = 0; + virtual std::string getFileDatatype() const = 0; + + // Reading + virtual py::array_t + read(std::variant numSamples) = 0; + + // Seeking + virtual void seek(long long position) = 0; + virtual void seekInternal(long long position) = 0; + virtual long long tell() const = 0; + + // State + virtual void close() = 0; + virtual bool isClosed() const = 0; + virtual bool isSeekable() const = 0; + + // File info + virtual std::optional getFilename() const = 0; + virtual PythonInputStream *getPythonInputStream() const = 0; + + // Context manager support + virtual std::shared_ptr enter() = 0; + virtual void exit(const py::object &type, const py::object &value, + const py::object &traceback) = 0; + + // For __repr__ + virtual std::string getClassName() const = 0; +}; + } // namespace Pedalboard diff --git a/pedalboard/io/AudioFileInit.h b/pedalboard/io/AudioFileInit.h index f447277e2..2a424e677 100644 --- a/pedalboard/io/AudioFileInit.h +++ b/pedalboard/io/AudioFileInit.h @@ -19,6 +19,7 @@ #include #include +#include #include #include @@ -137,6 +138,161 @@ Re-encoding a WAV file as an MP3 in four lines of Python:: )"); } +inline py::class_> +declare_ireadable_audio_file(py::module &m) { + return py::class_>( + m, "AbstractReadableAudioFile", + R"(An abstract base class for readable audio files. + +This class defines the common interface shared by :class:`ReadableAudioFile`, +:class:`ResampledReadableAudioFile`, and :class:`ChannelConvertedReadableAudioFile`. + +*Introduced in v0.9.17.* +)"); +} + +inline void +init_ireadable_audio_file(py::class_> + &pyAbstractReadableAudioFile) { + pyAbstractReadableAudioFile + .def("read", &AbstractReadableAudioFile::read, py::arg("num_frames") = 0, + R"( +Read the given number of frames (samples in each channel) from this audio file +at its current position. + +``num_frames`` is a required argument, as audio files can be deceptively large. (Consider that +an hour-long ``.ogg`` file may be only a handful of megabytes on disk, but may decompress to +nearly a gigabyte in memory.) Audio files should be read in chunks, rather than all at once, to avoid +hard-to-debug memory problems and out-of-memory crashes. + +Audio samples are returned as a multi-dimensional :class:`numpy.array` with the shape +``(channels, samples)``; i.e.: a stereo audio file will have shape ``(2, )``. +Returned data is always in the ``float32`` datatype. + +If the file does not contain enough audio data to fill ``num_frames``, the returned +:class:`numpy.array` will contain as many frames as could be read from the file. (In some cases, +passing :py:attr:`frames` as ``num_frames`` may still return less data than expected. See documentation +for :py:attr:`frames` and :py:attr:`exact_duration_known` for more information about situations +in which this may occur.) + +For most (but not all) audio files, the minimum possible sample value will be ``-1.0f`` and the +maximum sample value will be ``+1.0f``. + +.. note:: + For convenience, the ``num_frames`` argument may be a floating-point number. However, if the + provided number of frames contains a fractional part (i.e.: ``1.01`` instead of ``1.00``) then + an exception will be thrown, as a fractional number of samples cannot be returned. +)") + .def("seekable", &AbstractReadableAudioFile::isSeekable, + "Returns True if this file is currently open and calls to seek() " + "will work.") + .def("seek", &AbstractReadableAudioFile::seek, py::arg("position"), + "Seek this file to the provided location in frames. Future reads " + "will start from this position.") + .def("tell", &AbstractReadableAudioFile::tell, + "Return the current position of the read pointer in this audio " + "file, in frames. This value will increase as :meth:`read` is " + "called, and may decrease if :meth:`seek` is called.") + .def("close", &AbstractReadableAudioFile::close, + "Close this file, rendering this object unusable.") + .def_property_readonly("name", &AbstractReadableAudioFile::getFilename, + "The name of this file.\n\nIf this file was " + "opened from a file-like object, this will be " + "``None``.") + .def_property_readonly("closed", &AbstractReadableAudioFile::isClosed, + "True iff this file is closed (and no longer " + "usable), False otherwise.") + .def_property_readonly( + "samplerate", &AbstractReadableAudioFile::getSampleRate, + "The sample rate of this file in samples (per channel) per second " + "(Hz). Sample rates are represented as floating-point numbers by " + "default, but this property will be an integer if the file's sample " + "rate has no fractional part.") + .def_property_readonly("num_channels", + &AbstractReadableAudioFile::getNumChannels, + "The number of channels in this file.") + .def_property_readonly("exact_duration_known", + &AbstractReadableAudioFile::exactDurationKnown, + R"( +Returns :py:const:`True` if this file's :py:attr:`frames` and +:py:attr:`duration` attributes are exact values, or :py:const:`False` if the +:py:attr:`frames` and :py:attr:`duration` attributes are estimates based +on the file's size and bitrate. + +:py:attr:`exact_duration_known` will change from :py:const:`False` to +:py:const:`True` as the file is read to completion. Once :py:const:`True`, +this value will not change back to :py:const:`False` for the same +:py:class:`AudioFile` object (even after calls to :meth:`seek`). + +.. note:: + :py:attr:`exact_duration_known` will only ever be :py:const:`False` + when reading certain MP3 files. For files in other formats than MP3, + :py:attr:`exact_duration_known` will always be equal to :py:const:`True`. + +*Introduced in v0.7.2.* +)") + .def_property_readonly( + "frames", &AbstractReadableAudioFile::getLengthInSamples, + "The total number of frames (samples per " + "channel) in this file.\n\nFor example, " + "if this file contains 10 seconds of stereo audio at sample " + "rate of 44,100 Hz, ``frames`` will return ``441,000``.\n\n.. " + "warning::\n When reading certain MP3 files, the " + ":py:attr:`frames` and :py:attr:`duration` properties may " + "initially be estimates and **may change as the file is read**. " + "See the documentation for :py:attr:`.ReadableAudioFile.frames` " + "for more details.") + .def_property_readonly( + "duration", &AbstractReadableAudioFile::getDuration, + "The duration of this file in seconds (``frames`` " + "divided by ``samplerate``).\n\n.. " + "warning::\n When reading certain MP3 files, the " + ":py:attr:`frames` and :py:attr:`duration` properties may " + "initially be estimates and **may change as the file is read**. " + "See the documentation for :py:attr:`.ReadableAudioFile.frames` " + "for more details.") + .def_property_readonly( + "file_dtype", &AbstractReadableAudioFile::getFileDatatype, + "The data type (``\"int16\"``, ``\"float32\"``, etc) stored " + "natively by this file.\n\nNote that :meth:`read` will always " + "return a ``float32`` array, regardless of the value of this " + "property.") + .def("__enter__", &AbstractReadableAudioFile::enter, + "Use this file as a context manager, automatically closing the file " + "and releasing resources when the context manager exits.") + .def("__exit__", &AbstractReadableAudioFile::exit, + "Stop using this file as a context manager, close the file, and " + "release its resources.") + .def("__repr__", [](const AbstractReadableAudioFile &file) { + std::ostringstream ss; + ss << "empty()) { + ss << " filename=\"" << *file.getFilename() << "\""; + } else if (PythonInputStream *stream = file.getPythonInputStream()) { + ss << " file_like=" << stream->getRepresentation(); + } + + // Always show properties (they're cached and available even after + // close) + ss << " samplerate=" << file.getSampleRateAsDouble(); + ss << " num_channels=" << file.getNumChannels(); + ss << " frames=" << file.getLengthInSamples(); + ss << " file_dtype=" << file.getFileDatatype(); + + if (file.isClosed()) { + ss << " closed"; + } + + ss << " at " << &file; + ss << ">"; + return ss.str(); + }); +} + inline void init_audio_file( py::class_> &pyAudioFile) { /** @@ -324,4 +480,17 @@ programs. :class:`AudioFile` class in write (``"w"``) mode instead. )"); } + +// Forward declarations - these classes must be defined before calling this +// function +class ResampledReadableAudioFile; +class ChannelConvertedReadableAudioFile; + +// This function must be called after ResampledReadableAudioFile and +// ChannelConvertedReadableAudioFile are defined +inline void init_abstract_readable_audio_file_methods( + py::class_> + &pyAbstractReadableAudioFile); + } // namespace Pedalboard diff --git a/pedalboard/io/ChannelConvertedReadableAudioFile.h b/pedalboard/io/ChannelConvertedReadableAudioFile.h new file mode 100644 index 000000000..5550e12a6 --- /dev/null +++ b/pedalboard/io/ChannelConvertedReadableAudioFile.h @@ -0,0 +1,421 @@ +/* + * pedalboard + * Copyright 2026 Spotify AB + * + * Licensed under the GNU Public License, Version 3.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.gnu.org/licenses/gpl-3.0.html + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#include +#include + +#include "../BufferUtils.h" +#include "../JuceHeader.h" +#include "AudioFile.h" +#include "PythonInputStream.h" +#include "ReadableAudioFile.h" +#include "ResampledReadableAudioFile.h" + +namespace py = pybind11; + +namespace Pedalboard { + +/** + * A wrapper class that converts audio channel counts on-the-fly. + * Wraps any AbstractReadableAudioFile (ReadableAudioFile, + * ResampledReadableAudioFile, etc.) + */ +class ChannelConvertedReadableAudioFile + : public AbstractReadableAudioFile, + public std::enable_shared_from_this { +public: + ChannelConvertedReadableAudioFile( + std::shared_ptr audioFile, + int targetNumChannels) + : wrappedFile(audioFile), targetNumChannels(targetNumChannels) { + if (targetNumChannels < 1) { + throw std::domain_error("Target number of channels must be at least 1."); + } + + int sourceNumChannels = wrappedFile->getNumChannels(); + + // Only allow well-defined channel conversions: + // - Any -> mono (average all channels) + // - Mono -> any (duplicate mono to all channels) + // - Same channel count (no conversion needed) + // Disallow ambiguous conversions like stereo <-> multichannel + if (sourceNumChannels != targetNumChannels && sourceNumChannels != 1 && + targetNumChannels != 1) { + throw std::domain_error( + "Channel conversion from " + std::to_string(sourceNumChannels) + + " to " + std::to_string(targetNumChannels) + + " channels is not supported. Only conversions to/from mono (1 " + "channel) are well-defined. To convert to mono first, use " + ".mono().with_channels(" + + std::to_string(targetNumChannels) + ")."); + } + } + + std::variant getSampleRate() const override { + return wrappedFile->getSampleRate(); + } + + double getSampleRateAsDouble() const override { + return wrappedFile->getSampleRateAsDouble(); + } + + long long getLengthInSamples() const override { + return wrappedFile->getLengthInSamples(); + } + + double getDuration() const override { return wrappedFile->getDuration(); } + + long getNumChannels() const override { return targetNumChannels; } + + bool exactDurationKnown() const override { + return wrappedFile->exactDurationKnown(); + } + + std::string getFileFormat() const override { + return wrappedFile->getFileFormat(); + } + + std::string getFileDatatype() const override { + return wrappedFile->getFileDatatype(); + } + + py::array_t + read(std::variant numSamplesVariant) override { + long long numSamples = parseNumSamples(numSamplesVariant); + if (numSamples == 0) + throw std::domain_error( + "ChannelConvertedReadableAudioFile will not read an entire file " + "at once, " + "due to the possibility that a file may be larger than available " + "memory. Please pass a number of frames to read (available from " + "the 'frames' attribute)."); + + juce::AudioBuffer convertedBuffer; + { + py::gil_scoped_release release; + convertedBuffer = readInternal(numSamples); + } + + PythonException::raise(); + return copyJuceBufferIntoPyArray(convertedBuffer, + ChannelLayout::NotInterleaved, 0); + } + + /** + * Read samples from the underlying audio file, convert channels, and return a + * juce::AudioBuffer containing the result without holding the GIL. + * + * @param numSamples The number of samples to read. + * @return juce::AudioBuffer The resulting audio. + */ + juce::AudioBuffer readInternal(long long numSamples) { + // Note: We take a "write" lock here as calling readInternal will + // advance internal state: + ScopedTryWriteLock scopedTryWriteLock(objectLock); + if (!scopedTryWriteLock.isLocked()) { + throw std::runtime_error( + "Another thread is currently reading from this AudioFile. Note " + "that using multiple concurrent readers on the same AudioFile " + "object will produce nondeterministic results."); + } + + int sourceNumChannels = wrappedFile->getNumChannels(); + + // Read from the underlying file and copy data while holding the GIL + juce::AudioBuffer outputBuffer; + { + py::gil_scoped_acquire acquire; + py::array_t sourceArray = wrappedFile->read(numSamples); + + auto sourceInfo = sourceArray.request(); + long long actualSamplesRead = sourceInfo.shape[1]; + + if (actualSamplesRead == 0) { + return juce::AudioBuffer(targetNumChannels, 0); + } + + // Create output buffer and copy data while we still have access to the + // array + outputBuffer.setSize(targetNumChannels, actualSamplesRead); + float *sourcePtr = static_cast(sourceInfo.ptr); + copyChannelData(outputBuffer, sourcePtr, sourceNumChannels, + actualSamplesRead); + } + + return outputBuffer; + } + + void copyChannelData(juce::AudioBuffer &outputBuffer, float *sourcePtr, + int sourceNumChannels, long long actualSamplesRead) { + // Note: The constructor validates that only well-defined conversions are + // allowed (to/from mono, or same channel count), so we only need to handle + // those cases here. + + if (targetNumChannels == sourceNumChannels) { + // No conversion needed, just copy + for (int c = 0; c < targetNumChannels; c++) { + outputBuffer.copyFrom(c, 0, sourcePtr + (c * actualSamplesRead), + actualSamplesRead); + } + } else if (targetNumChannels == 1) { + // Mix down to mono: average all channels (SIMD-optimized) + float *outputPtr = outputBuffer.getWritePointer(0); + float channelVolume = 1.0f / sourceNumChannels; + + // Start with first channel + juce::FloatVectorOperations::copy(outputPtr, sourcePtr, + (int)actualSamplesRead); + + // Add remaining channels + for (int c = 1; c < sourceNumChannels; c++) { + float *channelPtr = sourcePtr + (c * actualSamplesRead); + juce::FloatVectorOperations::add(outputPtr, channelPtr, + (int)actualSamplesRead); + } + + // Apply volume scaling + juce::FloatVectorOperations::multiply(outputPtr, channelVolume, + (int)actualSamplesRead); + } else { + // Upmix from mono (sourceNumChannels == 1): duplicate to all channels + float *monoPtr = sourcePtr; + for (int c = 0; c < targetNumChannels; c++) { + outputBuffer.copyFrom(c, 0, monoPtr, actualSamplesRead); + } + } + } + + void seek(long long targetPosition) override { + wrappedFile->seek(targetPosition); + PythonException::raise(); + } + + void seekInternal(long long targetPosition) override { + wrappedFile->seekInternal(targetPosition); + } + + long long tell() const override { return wrappedFile->tell(); } + + void close() override { + py::gil_scoped_release release; + ScopedTryWriteLock scopedTryWriteLock(objectLock); + if (!scopedTryWriteLock.isLocked()) { + throw std::runtime_error( + "Another thread is currently reading from this AudioFile; it cannot " + "be closed until the other thread completes its operation."); + } + _isClosed = true; + } + + bool isClosed() const override { + if (wrappedFile->isClosed()) + return true; + + py::gil_scoped_release release; + const juce::ScopedReadLock scopedReadLock(objectLock); + return _isClosed; + } + + bool isSeekable() const override { return wrappedFile->isSeekable(); } + + std::optional getFilename() const override { + return wrappedFile->getFilename(); + } + + PythonInputStream *getPythonInputStream() const override { + return wrappedFile->getPythonInputStream(); + } + + std::shared_ptr enter() override { + return shared_from_this(); + } + + void exit(const py::object &type, const py::object &value, + const py::object &traceback) override { + bool shouldThrow = PythonException::isPending(); + close(); + + if (shouldThrow || PythonException::isPending()) + throw py::error_already_set(); + } + + std::string getClassName() const override { + return "ChannelConvertedReadableAudioFile"; + } + +private: + const std::shared_ptr wrappedFile; + const int targetNumChannels; + juce::ReadWriteLock objectLock; + bool _isClosed = false; +}; + +inline py::class_> +declare_readable_audio_file_with_channel_conversion(py::module &m) { + return py::class_>( + m, "ChannelConvertedReadableAudioFile", + R"( +A class that wraps an audio file for reading, while converting +the audio stream on-the-fly to a new channel count. + +*Introduced in v0.9.22.* + +Reading, seeking, and all other basic file I/O operations are supported (except for +:meth:`read_raw`). + +:class:`ChannelConvertedReadableAudioFile` should usually +be used via the :meth:`with_channels` method on :class:`ReadableAudioFile` +or :class:`ResampledReadableAudioFile`: + +:: + + with AudioFile("my_stereo_file.mp3").mono() as f: + f.num_channels # => 1 + mono_audio = f.read(int(f.samplerate * 10)) + + with AudioFile("my_mono_file.wav").stereo() as f: + f.num_channels # => 2 + stereo_audio = f.read(int(f.samplerate * 10)) + + with AudioFile("my_file.wav").with_channels(6) as f: + f.num_channels # => 6 + surround_audio = f.read(int(f.samplerate * 10)) + +When converting from stereo (or multi-channel) to mono, all channels are +averaged together with equal weighting. When converting from mono to +stereo (or multi-channel), the mono signal is duplicated to all output +channels. Other conversions (stereo to multi-channel, multi-channel to +stereo, etc) are not currently supported. +)"); +} + +inline void init_readable_audio_file_with_channel_conversion( + py::module &m, + py::class_> + &pyChannelConvertedReadableAudioFile) { + // Note: Most methods are inherited from AbstractReadableAudioFile. + // We only define class-specific methods here. + pyChannelConvertedReadableAudioFile + .def( + py::init( + [](std::shared_ptr audioFile, + int targetNumChannels) -> ChannelConvertedReadableAudioFile * { + // This definition is only here to provide nice docstrings. + throw std::runtime_error( + "Internal error: __init__ should never be called, as this " + "class implements __new__."); + }), + py::arg("audio_file"), py::arg("num_channels")) + .def_static( + "__new__", + [](const py::object *, + std::shared_ptr audioFile, + int targetNumChannels) { + return std::make_shared( + audioFile, targetNumChannels); + }, + py::arg("cls"), py::arg("audio_file"), py::arg("num_channels")); +} + +// Implementation of init_abstract_readable_audio_file_methods declared in +// AudioFileInit.h +inline void init_abstract_readable_audio_file_methods( + py::class_> + &pyAbstractReadableAudioFile) { + pyAbstractReadableAudioFile + .def( + "resampled_to", + [](std::shared_ptr file, + double targetSampleRate, ResamplingQuality quality) + -> std::shared_ptr { + if (file->getSampleRateAsDouble() == targetSampleRate) + return file; + + return std::make_shared( + file, targetSampleRate, quality); + }, + py::arg("target_sample_rate"), + py::arg("quality") = ResamplingQuality::WindowedSinc32, + "Return a :class:`ResampledReadableAudioFile` that will " + "automatically resample this audio file to the provided " + "`target_sample_rate`, using a constant amount of memory.\n\nIf " + "`target_sample_rate` matches the existing sample rate of the file, " + "the original file will be returned.\n\n*Introduced in v0.6.0.*") + .def( + "with_channels", + [](std::shared_ptr file, + int targetNumChannels) + -> std::shared_ptr { + if (file->getNumChannels() == targetNumChannels) + return file; + + return std::make_shared( + file, targetNumChannels); + }, + py::arg("num_channels"), + "Return a :class:`ChannelConvertedReadableAudioFile` that will " + "automatically convert the channel count of this audio file to the " + "provided `num_channels`.\n\nIf `num_channels` matches the existing " + "channel count of the file, the original file will be " + "returned.\n\nWhen converting from stereo (or multi-channel) to " + "mono, all channels are averaged together with equal weighting. When " + "converting from mono to stereo (or multi-channel), the mono signal " + "is duplicated to all output channels.\n\n*Introduced in v0.9.17.*") + .def( + "mono", + [](std::shared_ptr file) + -> std::shared_ptr { + if (file->getNumChannels() == 1) + return file; + + return std::make_shared(file, 1); + }, + "Return a :class:`ChannelConvertedReadableAudioFile` that will " + "automatically convert this audio file to mono (1 channel).\n\nIf " + "this file is already mono, the original file will be " + "returned.\n\nWhen converting from stereo (or multi-channel) to " + "mono, all channels are averaged together with equal " + "weighting.\n\n*Introduced in v0.9.17.*") + .def( + "stereo", + [](std::shared_ptr file) + -> std::shared_ptr { + if (file->getNumChannels() == 2) + return file; + + return std::make_shared(file, 2); + }, + "Return a :class:`ChannelConvertedReadableAudioFile` that will " + "automatically convert this audio file to stereo (2 " + "channels).\n\nIf this file is already stereo, the original file " + "will be returned.\n\nWhen converting from mono to stereo, the mono " + "signal is duplicated to both channels. When converting from " + "multi-channel (3 or more channels) to stereo, only the first two " + "channels are kept.\n\n*Introduced in v0.9.17.*"); +} + +} // namespace Pedalboard diff --git a/pedalboard/io/ReadableAudioFile.h b/pedalboard/io/ReadableAudioFile.h index 25275f679..e6f28d06b 100644 --- a/pedalboard/io/ReadableAudioFile.h +++ b/pedalboard/io/ReadableAudioFile.h @@ -57,7 +57,7 @@ inline long long parseNumSamples(std::variant numSamples) { } class ReadableAudioFile - : public AudioFile, + : public AbstractReadableAudioFile, public std::enable_shared_from_this { public: ReadableAudioFile(std::string filename) : filename(filename) { @@ -216,7 +216,7 @@ class ReadableAudioFile } } - std::variant getSampleRate() const { + std::variant getSampleRate() const override { double integerPart; double fractionalPart = std::modf(sampleRate, &integerPart); @@ -227,18 +227,20 @@ class ReadableAudioFile } } - double getSampleRateAsDouble() const { return sampleRate; } + double getSampleRateAsDouble() const override { return sampleRate; } - long long getLengthInSamples() const { + long long getLengthInSamples() const override { const juce::ScopedReadLock scopedLock(objectLock); return numFrames + (lengthCorrection ? *lengthCorrection : 0); } - double getDuration() const { return numFrames / getSampleRateAsDouble(); } + double getDuration() const override { + return numFrames / getSampleRateAsDouble(); + } - long getNumChannels() const { return numChannels; } + long getNumChannels() const override { return numChannels; } - std::string getFileFormat() const { + std::string getFileFormat() const override { const juce::ScopedReadLock scopedLock(objectLock); if (!reader) throw std::runtime_error("I/O operation on a closed file."); @@ -246,10 +248,10 @@ class ReadableAudioFile return reader->getFormatName().toStdString(); } - std::string getFileDatatype() const { return fileDatatype; } + std::string getFileDatatype() const override { return fileDatatype; } - py::array_t - read(std::variant numSamplesVariant) { + py::array_t + read(std::variant numSamplesVariant) override { long long numSamples = parseNumSamples(numSamplesVariant); if (numSamples == 0) @@ -570,12 +572,12 @@ class ReadableAudioFile return buffer; } - void seek(long long targetPosition) { + void seek(long long targetPosition) override { py::gil_scoped_release release; seekInternal(targetPosition); } - void seekInternal(long long targetPosition) { + void seekInternal(long long targetPosition) override { const juce::ScopedReadLock scopedReadLock(objectLock); if (!reader) throw std::runtime_error("I/O operation on a closed file."); @@ -605,13 +607,13 @@ class ReadableAudioFile currentPosition = targetPosition; } - long long tell() const { + long long tell() const override { py::gil_scoped_release release; const juce::ScopedReadLock scopedLock(objectLock); return currentPosition; } - void close() { + void close() override { ScopedTryWriteLock scopedTryWriteLock(objectLock); if (!scopedTryWriteLock.isLocked()) { throw std::runtime_error( @@ -623,13 +625,13 @@ class ReadableAudioFile reader.reset(); } - bool isClosed() const { + bool isClosed() const override { py::gil_scoped_release release; const juce::ScopedReadLock scopedLock(objectLock); return !reader; } - bool isSeekable() const { + bool isSeekable() const override { py::gil_scoped_release release; const juce::ScopedReadLock scopedLock(objectLock); @@ -638,7 +640,7 @@ class ReadableAudioFile return reader != nullptr; } - bool exactDurationKnown() const { + bool exactDurationKnown() const override { const juce::ScopedReadLock scopedLock(objectLock); if (juce::AudioFormatReaderWithPosition *approximateLengthReader = @@ -657,9 +659,9 @@ class ReadableAudioFile return true; } - std::optional getFilename() const { return filename; } + std::optional getFilename() const override { return filename; } - PythonInputStream *getPythonInputStream() const { + PythonInputStream *getPythonInputStream() const override { if (!filename.empty()) { return nullptr; } @@ -672,10 +674,12 @@ class ReadableAudioFile return (PythonInputStream *)reader->input; } - std::shared_ptr enter() { return shared_from_this(); } + std::shared_ptr enter() override { + return shared_from_this(); + } void exit(const py::object &type, const py::object &value, - const py::object &traceback) { + const py::object &traceback) override { bool shouldThrow = PythonException::isPending(); close(); @@ -683,6 +687,8 @@ class ReadableAudioFile throw py::error_already_set(); } + std::string getClassName() const override { return "ReadableAudioFile"; } + private: void throwReadError(long long currentPosition, long long numSamples, long long samplesRead = -1) { @@ -752,10 +758,10 @@ class ReadableAudioFile std::optional lengthCorrection = {}; }; -inline py::class_> declare_readable_audio_file(py::module &m) { - return py::class_>(m, "ReadableAudioFile", R"( A class that wraps an audio file for reading, with native support for Ogg Vorbis, @@ -789,11 +795,14 @@ Pedalboard.) } class ResampledReadableAudioFile; +class ChannelConvertedReadableAudioFile; inline void init_readable_audio_file( py::module &m, - py::class_> - &pyReadableAudioFile) { + py::class_> &pyReadableAudioFile) { + // Note: Most methods are inherited from AbstractReadableAudioFile. + // We only define class-specific methods here. pyReadableAudioFile .def(py::init([](std::string filename) -> ReadableAudioFile * { // This definition is only here to provide nice docstrings. @@ -838,32 +847,6 @@ inline void init_readable_audio_file( } }, py::arg("cls"), py::arg("file_like")) - .def("read", &ReadableAudioFile::read, py::arg("num_frames") = 0, R"( -Read the given number of frames (samples in each channel) from this audio file at its current position. - -``num_frames`` is a required argument, as audio files can be deceptively large. (Consider that -an hour-long ``.ogg`` file may be only a handful of megabytes on disk, but may decompress to -nearly a gigabyte in memory.) Audio files should be read in chunks, rather than all at once, to avoid -hard-to-debug memory problems and out-of-memory crashes. - -Audio samples are returned as a multi-dimensional :class:`numpy.array` with the shape -``(channels, samples)``; i.e.: a stereo audio file will have shape ``(2, )``. -Returned data is always in the ``float32`` datatype. - -If the file does not contain enough audio data to fill ``num_frames``, the returned -:class:`numpy.array` will contain as many frames as could be read from the file. (In some cases, -passing :py:attr:`frames` as ``num_frames`` may still return less data than expected. See documentation -for :py:attr:`frames` and :py:attr:`exact_duration_known` for more information about situations -in which this may occur.) - -For most (but not all) audio files, the minimum possible sample value will be ``-1.0f`` and the -maximum sample value will be ``+1.0f``. - -.. note:: - For convenience, the ``num_frames`` argument may be a floating-point number. However, if the - provided number of frames contains a fractional part (i.e.: ``1.01`` instead of ``1.00``) then - an exception will be thrown, as a fractional number of samples cannot be returned. -)") .def("read_raw", &ReadableAudioFile::readRaw, py::arg("num_frames") = 0, R"( Read the given number of frames (samples in each channel) from this audio file at its current position. @@ -888,165 +871,7 @@ in which this may occur.) For convenience, the ``num_frames`` argument may be a floating-point number. However, if the provided number of frames contains a fractional part (i.e.: ``1.01`` instead of ``1.00``) then an exception will be thrown, as a fractional number of samples cannot be returned. -)") - .def("seekable", &ReadableAudioFile::isSeekable, - "Returns True if this file is currently open and calls to seek() " - "will work.") - .def("seek", &ReadableAudioFile::seek, py::arg("position"), - "Seek this file to the provided location in frames. Future reads " - "will start from this position.") - .def("tell", &ReadableAudioFile::tell, - "Return the current position of the read pointer in this audio " - "file, in frames. This value will increase as :meth:`read` is " - "called, and may decrease if :meth:`seek` is called.") - .def("close", &ReadableAudioFile::close, - "Close this file, rendering this object unusable.") - .def("__enter__", &ReadableAudioFile::enter, - "Use this :class:`ReadableAudioFile` as a context manager, " - "automatically closing the file and releasing resources when the " - "context manager exits.") - .def("__exit__", &ReadableAudioFile::exit, - "Stop using this :class:`ReadableAudioFile` as a context manager, " - "close the file, release its resources.") - .def("__repr__", - [](const ReadableAudioFile &file) { - std::ostringstream ss; - ss << "empty()) { - ss << " filename=\"" << *file.getFilename() << "\""; - } else if (PythonInputStream *stream = - file.getPythonInputStream()) { - ss << " file_like=" << stream->getRepresentation(); - } - - ss << " samplerate=" << file.getSampleRateAsDouble(); - ss << " num_channels=" << file.getNumChannels(); - ss << " frames=" << file.getLengthInSamples(); - ss << " file_dtype=" << file.getFileDatatype(); - - if (file.isClosed()) { - ss << " closed"; - } - - ss << " at " << &file; - ss << ">"; - return ss.str(); - }) - .def_property_readonly( - "name", &ReadableAudioFile::getFilename, - "The name of this file.\n\nIf this :class:`ReadableAudioFile` was " - "opened from a file-like object, this will be ``None``.") - .def_property_readonly("closed", &ReadableAudioFile::isClosed, - "True iff this file is closed (and no longer " - "usable), False otherwise.") - .def_property_readonly( - "samplerate", &ReadableAudioFile::getSampleRate, - "The sample rate of this file in samples (per channel) per second " - "(Hz). Sample rates are represented as floating-point numbers by " - "default, but this property will be an integer if the file's sample " - "rate has no fractional part.") - .def_property_readonly("num_channels", &ReadableAudioFile::getNumChannels, - "The number of channels in this file.") - .def_property_readonly("exact_duration_known", - &ReadableAudioFile::exactDurationKnown, - R"( -Returns :py:const:`True` if this file's :py:attr:`frames` and -:py:attr:`duration` attributes are exact values, or :py:const:`False` if the -:py:attr:`frames` and :py:attr:`duration` attributes are estimates based -on the file's size and bitrate. - -If :py:attr:`exact_duration_known` is :py:const:`False`, this value will -change to :py:const:`True` once the file is read to completion. Once -:py:const:`True`, this value will not change back to :py:const:`False` -for the same :py:class:`AudioFile` object (even after calls to :meth:`seek`). - -.. note:: - :py:attr:`exact_duration_known` will only ever be :py:const:`False` - when reading certain MP3 files. For files in other formats than MP3, - :py:attr:`exact_duration_known` will always be equal to :py:const:`True`. - -*Introduced in v0.7.2.* -)") - .def_property_readonly("frames", &ReadableAudioFile::getLengthInSamples, - R"( -The total number of frames (samples per channel) in this file. - -For example, if this file contains 10 seconds of stereo audio at sample rate -of 44,100 Hz, ``frames`` will return ``441,000``. - -.. warning:: - When reading certain MP3 files that have been encoded in constant bitrate mode, - the :py:attr:`frames` and :py:attr:`duration` properties may initially be estimates - and **may change as the file is read**. The :py:attr:`exact_duration_known` - property indicates if the values of :py:attr:`frames` and :py:attr:`duration` - are estimates or exact values. - - This discrepancy is due to the fact that MP3 files are not required to have - headers that indicate the duration of the file. If an MP3 file is opened and a - ``Xing`` or ``Info`` header frame is not found, the initial value of the - :py:attr:`frames` and :py:attr:`duration` attributes are estimates based on the file's - bitrate and size. This may result in an overestimate of the file's duration - if there is additional data present in the file after the audio stream is finished. - - If the exact number of frames in the file is required, read the entire file - first before accessing the :py:attr:`frames` or :py:attr:`duration` properties. - This operation forces each frame to be parsed and guarantees that - :py:attr:`frames` and :py:attr:`duration` are correct, at the expense of - scanning the entire file:: - - with AudioFile("my_file.mp3") as f: - while f.tell() < f.frames: - f.read(f.samplerate * 60) - - # f.frames is now guaranteed to be exact, as the entire file has been read: - assert f.exact_duration_known == True - - f.seek(0) - num_channels, num_samples = f.read(f.frames).shape - assert num_samples == f.frames - - This behaviour is present in v0.7.2 and later; prior versions would - raise an exception when trying to read the ends of MP3 files that contained - trailing non-audio data and lacked ``Xing`` or ``Info`` headers. -)") - .def_property_readonly("duration", &ReadableAudioFile::getDuration, - R"( -The duration of this file in seconds (``frames`` divided by ``samplerate``). - -.. warning:: - :py:attr:`duration` may be an overestimate for certain MP3 files. - Use :py:attr:`exact_duration_known` property to determine if - :py:attr:`duration` is accurate. (See the documentation for the - :py:attr:`frames` attribute for more details.) -)") - .def_property_readonly( - "file_dtype", &ReadableAudioFile::getFileDatatype, - "The data type (``\"int16\"``, ``\"float32\"``, etc) stored " - "natively by this file.\n\nNote that :meth:`read` will always " - "return a ``float32`` array, regardless of the value of this " - "property. Use :meth:`read_raw` to read data from the file in its " - "``file_dtype``.") - .def( - "resampled_to", - [](std::shared_ptr file, double targetSampleRate, - ResamplingQuality quality) - -> std::variant, - std::shared_ptr> { - if (file->getSampleRateAsDouble() == targetSampleRate) - return {file}; - - return {std::make_shared( - file, targetSampleRate, quality)}; - }, - py::arg("target_sample_rate"), - py::arg("quality") = ResamplingQuality::WindowedSinc32, - "Return a :class:`ResampledReadableAudioFile` that will " - "automatically resample this :class:`ReadableAudioFile` to the " - "provided `target_sample_rate`, using a constant amount of " - "memory.\n\nIf `target_sample_rate` matches the existing sample rate " - "of the file, the original file will be returned.\n\n*Introduced in " - "v0.6.0.*"); +)"); m.def("get_supported_read_formats", []() { juce::AudioFormatManager manager; diff --git a/pedalboard/io/ResampledReadableAudioFile.h b/pedalboard/io/ResampledReadableAudioFile.h index ed0f2efbc..3a4cb80b1 100644 --- a/pedalboard/io/ResampledReadableAudioFile.h +++ b/pedalboard/io/ResampledReadableAudioFile.h @@ -68,16 +68,17 @@ static inline int inputBufferSizeFor(ResamplingQuality quality) { } class ResampledReadableAudioFile - : public AudioFile, + : public AbstractReadableAudioFile, public std::enable_shared_from_this { public: - ResampledReadableAudioFile(std::shared_ptr audioFile, - float targetSampleRate, ResamplingQuality quality) + ResampledReadableAudioFile( + std::shared_ptr audioFile, + float targetSampleRate, ResamplingQuality quality) : audioFile(audioFile), resampler(audioFile->getSampleRateAsDouble(), targetSampleRate, audioFile->getNumChannels(), quality) {} - std::variant getSampleRate() const { + std::variant getSampleRate() const override { py::gil_scoped_release release; const juce::ScopedReadLock scopedReadLock(objectLock); @@ -92,13 +93,13 @@ class ResampledReadableAudioFile } } - double getSampleRateAsDouble() const { + double getSampleRateAsDouble() const override { py::gil_scoped_release release; const juce::ScopedReadLock scopedReadLock(objectLock); return resampler.getTargetSampleRate(); } - long getLengthInSamples() const { + long long getLengthInSamples() const override { double underlyingLengthInSamples = (double)audioFile->getLengthInSamples(); double underlyingSampleRate = audioFile->getSampleRateAsDouble(); @@ -111,30 +112,30 @@ class ResampledReadableAudioFile length -= (std::round(resampler.getOutputLatency()) - resampler.getOutputLatency()); } - return (long)length; + return (long long)length; } - double getDuration() const { + double getDuration() const override { // No need for a ScopedReadLock here, as audioFile is const: return audioFile->getDuration(); } - long getNumChannels() const { + long getNumChannels() const override { // No need for a ScopedReadLock here, as audioFile is const: return audioFile->getNumChannels(); } - bool exactDurationKnown() const { + bool exactDurationKnown() const override { // No need for a ScopedReadLock here, as audioFile is const: return audioFile->exactDurationKnown(); } - std::string getFileFormat() const { + std::string getFileFormat() const override { // No need for a ScopedReadLock here, as audioFile is const: return audioFile->getFileFormat(); } - std::string getFileDatatype() const { + std::string getFileDatatype() const override { // No need for a ScopedReadLock here, as audioFile is const: return audioFile->getFileDatatype(); } @@ -145,7 +146,8 @@ class ResampledReadableAudioFile return resampler.getQuality(); } - py::array_t read(std::variant numSamplesVariant) { + py::array_t + read(std::variant numSamplesVariant) override { long long numSamples = parseNumSamples(numSamplesVariant); if (numSamples == 0) throw std::domain_error( @@ -247,11 +249,27 @@ class ResampledReadableAudioFile std::optional> resamplerInput; if (inputSamplesRequired > 0) { - // Read from the underlying audioFile into our contiguous buffer, - // which causes the sourceSamples AudioBuffer to be filled: - long long samplesRead = audioFile->readInternal( - audioFile->getNumChannels(), inputSamplesRequired, - contiguousSourceSampleBuffer.data()); + // Read from the underlying audioFile using the public read() interface. + // We need to acquire the GIL since read() creates Python objects. + long long samplesRead = 0; + { + py::gil_scoped_acquire acquire; + py::array_t readResult = audioFile->read(inputSamplesRequired); + py::buffer_info bufInfo = readResult.request(); + samplesRead = bufInfo.shape[1]; // shape is (channels, samples) + + // Copy data from the numpy array to our contiguous buffer. + // Use samplesRead for the stride (not inputSamplesRequired) to match + // the layout expected by the pointer update below when the read is + // shorter than requested. + float *srcPtr = static_cast(bufInfo.ptr); + for (int c = 0; c < audioFile->getNumChannels(); c++) { + for (long long i = 0; i < samplesRead; i++) { + contiguousSourceSampleBuffer[c * samplesRead + i] = + srcPtr[c * samplesRead + i]; + } + } + } // Resize the sourceSamples buffer to the number of samples read, // without reallocating the memory underneath @@ -330,66 +348,67 @@ class ResampledReadableAudioFile return resampledBuffer; } - void seek(long long targetPosition) { + void seek(long long targetPosition) override { py::gil_scoped_release release; - { - ScopedTryWriteLock scopedTryWriteLock(objectLock); - if (!scopedTryWriteLock.isLocked()) { - throw std::runtime_error( - "Another thread is currently reading from this AudioFile. Note " - "that using multiple concurrent readers on the same AudioFile " - "object will produce nondeterministic results."); - } - long long positionToSeekToIncludingBuffers = targetPosition; + seekInternal(targetPosition); + PythonException::raise(); + } + + void seekInternal(long long targetPosition) override { + ScopedTryWriteLock scopedTryWriteLock(objectLock); + if (!scopedTryWriteLock.isLocked()) { + throw std::runtime_error( + "Another thread is currently reading from this AudioFile. Note " + "that using multiple concurrent readers on the same AudioFile " + "object will produce nondeterministic results."); + } + long long positionToSeekToIncludingBuffers = targetPosition; - long long targetPositionInSourceSampleRate = - std::max(0LL, (long long)(((double)positionToSeekToIncludingBuffers * - resampler.getSourceSampleRate()) / - resampler.getTargetSampleRate())); + long long targetPositionInSourceSampleRate = + std::max(0LL, (long long)(((double)positionToSeekToIncludingBuffers * + resampler.getSourceSampleRate()) / + resampler.getTargetSampleRate())); - targetPositionInSourceSampleRate -= - inputBufferSizeFor(resampler.getQuality()); + targetPositionInSourceSampleRate -= + inputBufferSizeFor(resampler.getQuality()); - long long maximumOverflow = (long long)std::ceil( - resampler.getSourceSampleRate() / resampler.getTargetSampleRate()); - targetPositionInSourceSampleRate -= std::max(0LL, maximumOverflow); + long long maximumOverflow = (long long)std::ceil( + resampler.getSourceSampleRate() / resampler.getTargetSampleRate()); + targetPositionInSourceSampleRate -= std::max(0LL, maximumOverflow); - double floatingPositionInTargetSampleRate = - std::max(0.0, ((double)targetPositionInSourceSampleRate * - resampler.getTargetSampleRate()) / - resampler.getSourceSampleRate()); + double floatingPositionInTargetSampleRate = + std::max(0.0, ((double)targetPositionInSourceSampleRate * + resampler.getTargetSampleRate()) / + resampler.getSourceSampleRate()); - positionInTargetSampleRate = - (long long)(floatingPositionInTargetSampleRate); + positionInTargetSampleRate = + (long long)(floatingPositionInTargetSampleRate); - resampler.reset(); + resampler.reset(); - long long inputSamplesUsed = - resampler.advanceResamplerState(positionInTargetSampleRate); - targetPositionInSourceSampleRate = inputSamplesUsed; + long long inputSamplesUsed = + resampler.advanceResamplerState(positionInTargetSampleRate); + targetPositionInSourceSampleRate = inputSamplesUsed; - audioFile->seekInternal(std::max(0LL, targetPositionInSourceSampleRate)); + audioFile->seekInternal(std::max(0LL, targetPositionInSourceSampleRate)); - outputBuffer.setSize(0, 0); + outputBuffer.setSize(0, 0); - const long long chunkSize = 1024 * 1024; - for (long long i = positionInTargetSampleRate; i < targetPosition; - i += chunkSize) { - long long numSamples = std::min(chunkSize, targetPosition - i); - this->readInternal(numSamples); - } + const long long chunkSize = 1024 * 1024; + for (long long i = positionInTargetSampleRate; i < targetPosition; + i += chunkSize) { + long long numSamples = std::min(chunkSize, targetPosition - i); + this->readInternal(numSamples); } - - PythonException::raise(); } - long long tell() const { + long long tell() const override { py::gil_scoped_release release; const juce::ScopedReadLock scopedReadLock(objectLock); return positionInTargetSampleRate; } - void close() { + void close() override { py::gil_scoped_release release; ScopedTryWriteLock scopedTryWriteLock(objectLock); if (!scopedTryWriteLock.isLocked()) { @@ -400,7 +419,7 @@ class ResampledReadableAudioFile _isClosed = true; } - bool isClosed() const { + bool isClosed() const override { // No need for a ScopedReadLock here, as audioFile is const: if (audioFile->isClosed()) return true; @@ -411,27 +430,27 @@ class ResampledReadableAudioFile return _isClosed; } - bool isSeekable() const { + bool isSeekable() const override { // No need for a ScopedReadLock here, as audioFile is const: return audioFile->isSeekable(); } - std::optional getFilename() const { + std::optional getFilename() const override { // No need for a ScopedReadLock here, as audioFile is const: return audioFile->getFilename(); } - PythonInputStream *getPythonInputStream() const { + PythonInputStream *getPythonInputStream() const override { // No need for a ScopedReadLock here, as audioFile is const: return audioFile->getPythonInputStream(); } - std::shared_ptr enter() { + std::shared_ptr enter() override { return shared_from_this(); } void exit(const py::object &type, const py::object &value, - const py::object &traceback) { + const py::object &traceback) override { bool shouldThrow = PythonException::isPending(); close(); @@ -439,8 +458,12 @@ class ResampledReadableAudioFile throw py::error_already_set(); } + std::string getClassName() const override { + return "ResampledReadableAudioFile"; + } + private: - const std::shared_ptr audioFile; + const std::shared_ptr audioFile; StreamResampler resampler; juce::AudioBuffer outputBuffer; long long positionInTargetSampleRate = 0; @@ -448,10 +471,10 @@ class ResampledReadableAudioFile bool _isClosed = false; }; -inline py::class_> declare_resampled_readable_audio_file(py::module &m) { - return py::class_>( m, "ResampledReadableAudioFile", R"( @@ -485,12 +508,15 @@ reads, seeking through files, and using a constant amount of memory. } inline void init_resampled_readable_audio_file( - py::module &m, py::class_> - &pyResampledReadableAudioFile) { + py::module &m, + py::class_> + &pyResampledReadableAudioFile) { + // Note: Most methods are inherited from AbstractReadableAudioFile. + // We only define class-specific methods and override docstrings where needed. pyResampledReadableAudioFile .def(py::init( - [](std::shared_ptr audioFile, + [](std::shared_ptr audioFile, float targetSampleRate, ResamplingQuality quality) -> ResampledReadableAudioFile * { // This definition is only here to provide nice docstrings. @@ -502,163 +528,14 @@ inline void init_resampled_readable_audio_file( py::arg("resampling_quality") = ResamplingQuality::WindowedSinc32) .def_static( "__new__", - [](const py::object *, std::shared_ptr audioFile, + [](const py::object *, + std::shared_ptr audioFile, float targetSampleRate, ResamplingQuality quality) { return std::make_shared( audioFile, targetSampleRate, quality); }, py::arg("cls"), py::arg("audio_file"), py::arg("target_sample_rate"), py::arg("resampling_quality") = ResamplingQuality::WindowedSinc32) - .def("read", &ResampledReadableAudioFile::read, py::arg("num_frames") = 0, - R"( -Read the given number of frames (samples in each channel, at the target sample rate) -from this audio file at its current position, automatically resampling on-the-fly to -``target_sample_rate``. - -``num_frames`` is a required argument, as audio files can be deceptively large. (Consider that -an hour-long ``.ogg`` file may be only a handful of megabytes on disk, but may decompress to -nearly a gigabyte in memory.) Audio files should be read in chunks, rather than all at once, to avoid -hard-to-debug memory problems and out-of-memory crashes. - -Audio samples are returned as a multi-dimensional :class:`numpy.array` with the shape -``(channels, samples)``; i.e.: a stereo audio file will have shape ``(2, )``. -Returned data is always in the ``float32`` datatype. - -If the file does not contain enough audio data to fill ``num_frames``, the returned -:class:`numpy.array` will contain as many frames as could be read from the file. (In some cases, -passing :py:attr:`frames` as ``num_frames`` may still return less data than expected. See documentation -for :py:attr:`frames` and :py:attr:`exact_duration_known` for more information about situations -in which this may occur.) - -For most (but not all) audio files, the minimum possible sample value will be ``-1.0f`` and the -maximum sample value will be ``+1.0f``. - -.. note:: - For convenience, the ``num_frames`` argument may be a floating-point number. However, if the - provided number of frames contains a fractional part (i.e.: ``1.01`` instead of ``1.00``) then - an exception will be thrown, as a fractional number of samples cannot be returned. -)") - .def("seekable", &ResampledReadableAudioFile::isSeekable, - "Returns True if this file is currently open and calls to seek() " - "will work.") - .def("seek", &ResampledReadableAudioFile::seek, py::arg("position"), - "Seek this file to the provided location in frames at the target " - "sample rate. Future reads will start from this position.\n\n.. " - "note::\n Prior to version 0.7.3, this method operated in linear " - "time with respect to the seek position (i.e.: the file was seeked " - "to its beginning and pushed through the resampler) to ensure that " - "the resampled audio output was sample-accurate. This was optimized " - "in version 0.7.3 to operate in effectively constant time while " - "retaining sample-accuracy.") - .def("tell", &ResampledReadableAudioFile::tell, - "Return the current position of the read pointer in this audio " - "file, in frames at the target sample rate. This value will " - "increase as :meth:`read` is " - "called, and may decrease if :meth:`seek` is called.") - .def("close", &ResampledReadableAudioFile::close, - "Close this file, rendering this object unusable. Note that the " - ":class:`ReadableAudioFile` instance that is wrapped by this object " - "will not be closed, and will remain usable.") - .def("__enter__", &ResampledReadableAudioFile::enter, - "Use this :class:`ResampledReadableAudioFile` as a context manager, " - "automatically closing the file and releasing resources when the " - "context manager exits.") - .def("__exit__", &ResampledReadableAudioFile::exit, - "Stop using this :class:`ResampledReadableAudioFile` as a context " - "manager, close the file, release its resources.") - .def("__repr__", - [](const ResampledReadableAudioFile &file) { - std::ostringstream ss; - ss << "empty()) { - ss << " filename=\"" << *file.getFilename() << "\""; - } else if (PythonInputStream *stream = - file.getPythonInputStream()) { - ss << " file_like=" << stream->getRepresentation(); - } - - if (file.isClosed()) { - ss << " closed"; - } else { - ss << " samplerate=" << file.getSampleRateAsDouble(); - ss << " num_channels=" << file.getNumChannels(); - ss << " frames=" << file.getLengthInSamples(); - ss << " file_dtype=" << file.getFileDatatype(); - } - ss << " at " << &file; - ss << ">"; - return ss.str(); - }) - .def_property_readonly( - "name", &ResampledReadableAudioFile::getFilename, - "The name of this file.\n\nIf the " - ":class:`ReadableAudioFile` wrapped by this " - ":class:`ResampledReadableAudioFile` was " - "opened from a file-like object, this will be ``None``.") - .def_property_readonly( - "closed", &ResampledReadableAudioFile::isClosed, - "True iff either this file or its wrapped :class:`ReadableAudioFile` " - "instance are closed (and no longer usable), False otherwise.") - .def_property_readonly( - "samplerate", &ResampledReadableAudioFile::getSampleRate, - "The sample rate of this file in samples (per channel) per second " - "(Hz). This will be equal to the ``target_sample_rate`` parameter " - "passed when this object was created. Sample rates are represented " - "as floating-point numbers by default, but this property will be an " - "integer if the file's target sample rate has no fractional part.") - .def_property_readonly("num_channels", - &ResampledReadableAudioFile::getNumChannels, - "The number of channels in this file.") - .def_property_readonly("exact_duration_known", - &ResampledReadableAudioFile::exactDurationKnown, - R"( -Returns :py:const:`True` if this file's :py:attr:`frames` and -:py:attr:`duration` attributes are exact values, or :py:const:`False` if the -:py:attr:`frames` and :py:attr:`duration` attributes are estimates based -on the file's size and bitrate. - -:py:attr:`exact_duration_known` will change from :py:const:`False` to -:py:const:`True` as the file is read to completion. Once :py:const:`True`, -this value will not change back to :py:const:`False` for the same -:py:class:`AudioFile` object (even after calls to :meth:`seek`). - -.. note:: - :py:attr:`exact_duration_known` will only ever be :py:const:`False` - when reading certain MP3 files. For files in other formats than MP3, - :py:attr:`exact_duration_known` will always be equal to :py:const:`True`. - -*Introduced in v0.7.2.* -)") - .def_property_readonly( - "frames", &ResampledReadableAudioFile::getLengthInSamples, - "The total number of frames (samples per " - "channel) in this file, at the target sample rate.\n\nFor example, " - "if this file contains 10 seconds of stereo audio at sample " - "rate of 44,100 Hz, and ``target_sample_rate`` is 22,050 Hz, " - "``frames`` will return ``22,050``.\n\nNote that different " - "``resampling_quality`` values used for resampling may cause " - "``frames`` to differ by ± 1 from its expected value.\n\n.. " - "warning::\n When reading certain MP3 files, the " - ":py:attr:`frames` and :py:attr:`duration` properties may " - "initially be estimates and **may change as the file is read**. " - "See the documentation for :py:attr:`.ReadableAudioFile.frames` " - "for more details.") - .def_property_readonly( - "duration", &ResampledReadableAudioFile::getDuration, - "The duration of this file in seconds (``frames`` " - "divided by ``samplerate``).\n\n.. " - "warning::\n When reading certain MP3 files, the " - ":py:attr:`frames` and :py:attr:`duration` properties may " - "initially be estimates and **may change as the file is read**. " - "See the documentation for :py:attr:`.ReadableAudioFile.frames` " - "for more details.") - .def_property_readonly( - "file_dtype", &ResampledReadableAudioFile::getFileDatatype, - "The data type (``\"int16\"``, ``\"float32\"``, etc) stored " - "natively by this file.\n\nNote that :meth:`read` will always " - "return a ``float32`` array, regardless of the value of this " - "property.") .def_property_readonly( "resampling_quality", &ResampledReadableAudioFile::getQuality, "The resampling algorithm used to resample from the original file's " diff --git a/pedalboard/python_bindings.cpp b/pedalboard/python_bindings.cpp index fd83cf059..28a5827fd 100644 --- a/pedalboard/python_bindings.cpp +++ b/pedalboard/python_bindings.cpp @@ -65,6 +65,7 @@ namespace py = pybind11; #include "io/AudioFileInit.h" #include "io/AudioStream.h" +#include "io/ChannelConvertedReadableAudioFile.h" #include "io/ReadableAudioFile.h" #include "io/ResampledReadableAudioFile.h" #include "io/StreamResampler.h" @@ -249,13 +250,20 @@ If the number of samples and the number of channels are the same, each "writing audio files or streams.\n\n*Introduced in v0.5.1.*"; auto pyAudioFile = declare_audio_file(io); + auto pyAbstractReadableAudioFile = declare_ireadable_audio_file(io); auto pyReadableAudioFile = declare_readable_audio_file(io); + auto pyChannelConvertedReadableAudioFile = + declare_readable_audio_file_with_channel_conversion(io); auto pyResampledReadableAudioFile = declare_resampled_readable_audio_file(io); auto pyWriteableAudioFile = declare_writeable_audio_file(io); init_audio_file(pyAudioFile); + init_ireadable_audio_file(pyAbstractReadableAudioFile); init_readable_audio_file(io, pyReadableAudioFile); + init_readable_audio_file_with_channel_conversion( + io, pyChannelConvertedReadableAudioFile); init_resampled_readable_audio_file(io, pyResampledReadableAudioFile); + init_abstract_readable_audio_file_methods(pyAbstractReadableAudioFile); init_writeable_audio_file(io, pyWriteableAudioFile); init_stream_resampler(io); diff --git a/pedalboard_native/io/__init__.pyi b/pedalboard_native/io/__init__.pyi index 4ad836986..c46cf3ee6 100644 --- a/pedalboard_native/io/__init__.pyi +++ b/pedalboard_native/io/__init__.pyi @@ -18,6 +18,7 @@ _Shape = typing.Tuple[int, ...] __all__ = [ "AudioFile", "AudioStream", + "ChannelConvertedReadableAudioFile", "ReadableAudioFile", "ResampledReadableAudioFile", "StreamResampler", @@ -554,6 +555,45 @@ class ReadableAudioFile(AudioFile): *Introduced in v0.6.0.* """ + def with_channels( + self, num_channels: int + ) -> typing.Union[ReadableAudioFile, ChannelConvertedReadableAudioFile]: + """ + Return a :class:`ChannelConvertedReadableAudioFile` that will automatically convert the channel count of this audio file to the provided `num_channels`. + + If `num_channels` matches the existing channel count of the file, the original file will be returned. + + When converting from stereo (or multi-channel) to mono, all channels are averaged together with equal weighting. When converting from mono to stereo (or multi-channel), the mono signal is duplicated to all output channels. + + *Introduced in v0.9.17.* + """ + + def mono( + self, + ) -> typing.Union[ReadableAudioFile, ChannelConvertedReadableAudioFile]: + """ + Return a :class:`ChannelConvertedReadableAudioFile` that will automatically convert this audio file to mono (1 channel). + + If this file is already mono, the original file will be returned. + + When converting from stereo (or multi-channel) to mono, all channels are averaged together with equal weighting. + + *Introduced in v0.9.17.* + """ + + def stereo( + self, + ) -> typing.Union[ReadableAudioFile, ChannelConvertedReadableAudioFile]: + """ + Return a :class:`ChannelConvertedReadableAudioFile` that will automatically convert this audio file to stereo (2 channels). + + If this file is already stereo, the original file will be returned. + + When converting from mono to stereo, the mono signal is duplicated to both channels. When converting from multi-channel (3 or more channels) to stereo, only the first two channels are kept. + + *Introduced in v0.9.17.* + """ + def seek(self, position: int) -> None: """ Seek this file to the provided location in frames. Future reads will start from this position. @@ -694,6 +734,265 @@ class ReadableAudioFile(AudioFile): The sample rate of this file in samples (per channel) per second (Hz). Sample rates are represented as floating-point numbers by default, but this property will be an integer if the file's sample rate has no fractional part. + """ + pass + +class ChannelConvertedReadableAudioFile(AudioFile): + """ + A class that wraps an audio file for reading, while converting + the audio stream on-the-fly to a new channel count. + + *Introduced in v0.9.22.* + + Reading, seeking, and all other basic file I/O operations are supported (except for + :meth:`read_raw`). + + :class:`ChannelConvertedReadableAudioFile` should usually + be used via the :meth:`with_channels` method on :class:`ReadableAudioFile` + or :class:`ResampledReadableAudioFile`: + + :: + + with AudioFile("my_stereo_file.mp3").mono() as f: + f.num_channels # => 1 + mono_audio = f.read(int(f.samplerate * 10)) + + with AudioFile("my_mono_file.wav").stereo() as f: + f.num_channels # => 2 + stereo_audio = f.read(int(f.samplerate * 10)) + + with AudioFile("my_file.wav").with_channels(6) as f: + f.num_channels # => 6 + surround_audio = f.read(int(f.samplerate * 10)) + + When converting from stereo (or multi-channel) to mono, all channels are + averaged together with equal weighting. When converting from mono to + stereo (or multi-channel), the mono signal is duplicated to all output + channels. Other conversions (stereo to multi-channel, multi-channel to + stereo, etc) are not currently supported. + """ + + def __enter__(self) -> ChannelConvertedReadableAudioFile: + """ + Use this :class:`ChannelConvertedReadableAudioFile` as a context manager, automatically closing the file and releasing resources when the context manager exits. + """ + + def __exit__(self, arg0: object, arg1: object, arg2: object) -> None: + """ + Stop using this :class:`ChannelConvertedReadableAudioFile` as a context manager, close the file, release its resources. + """ + + def __init__( + self, + audio_file: ReadableAudioFile, + num_channels: int, + ) -> None: ... + @classmethod + def __new__( + cls, + audio_file: ReadableAudioFile, + num_channels: int, + ) -> ChannelConvertedReadableAudioFile: ... + def __repr__(self) -> str: ... + def close(self) -> None: + """ + Close this file, rendering this object unusable. Note that the :class:`ReadableAudioFile` instance that is wrapped by this object will not be closed, and will remain usable. + """ + + def resampled_to( + self, + target_sample_rate: float, + quality: pedalboard_native.Resample.Quality = pedalboard_native.Resample.Quality.WindowedSinc32, + ) -> typing.Union[ChannelConvertedReadableAudioFile, ResampledReadableAudioFile]: + """ + Return a :class:`ResampledReadableAudioFile` that will automatically resample this audio file to the provided `target_sample_rate`, using a constant amount of memory. + + If `target_sample_rate` matches the existing sample rate of the file, the original file will be returned. + + *Introduced in v0.6.0.* + """ + + def with_channels( + self, num_channels: int + ) -> typing.Union[ChannelConvertedReadableAudioFile, ChannelConvertedReadableAudioFile]: + """ + Return a :class:`ChannelConvertedReadableAudioFile` that will automatically convert the channel count of this audio file to the provided `num_channels`. + + If `num_channels` matches the existing channel count of the file, the original file will be returned. + + When converting from stereo (or multi-channel) to mono, all channels are averaged together with equal weighting. When converting from mono to stereo (or multi-channel), the mono signal is duplicated to all output channels. + + *Introduced in v0.9.17.* + """ + + def mono( + self, + ) -> typing.Union[ChannelConvertedReadableAudioFile, ChannelConvertedReadableAudioFile]: + """ + Return a :class:`ChannelConvertedReadableAudioFile` that will automatically convert this audio file to mono (1 channel). + + If this file is already mono, the original file will be returned. + + When converting from stereo (or multi-channel) to mono, all channels are averaged together with equal weighting. + + *Introduced in v0.9.17.* + """ + + def stereo( + self, + ) -> typing.Union[ChannelConvertedReadableAudioFile, ChannelConvertedReadableAudioFile]: + """ + Return a :class:`ChannelConvertedReadableAudioFile` that will automatically convert this audio file to stereo (2 channels). + + If this file is already stereo, the original file will be returned. + + When converting from mono to stereo, the mono signal is duplicated to both channels. When converting from multi-channel (3 or more channels) to stereo, only the first two channels are kept. + + *Introduced in v0.9.17.* + """ + + def read( + self, num_frames: typing.Union[float, int] = 0 + ) -> NDArray[float32]: + """ + Read the given number of frames (samples in each channel) from this audio file at its current position, automatically converting channel counts on-the-fly. + + ``num_frames`` is a required argument, as audio files can be deceptively large. (Consider that + an hour-long ``.ogg`` file may be only a handful of megabytes on disk, but may decompress to + nearly a gigabyte in memory.) Audio files should be read in chunks, rather than all at once, to avoid + hard-to-debug memory problems and out-of-memory crashes. + + Audio samples are returned as a multi-dimensional :class:`numpy.array` with the shape + ``(channels, samples)``; i.e.: a stereo audio file will have shape ``(2, )``. + Returned data is always in the ``float32`` datatype. + + If the file does not contain enough audio data to fill ``num_frames``, the returned + :class:`numpy.array` will contain as many frames as could be read from the file. (In some cases, + passing :py:attr:`frames` as ``num_frames`` may still return less data than expected. See documentation + for :py:attr:`frames` and :py:attr:`exact_duration_known` for more information about situations + in which this may occur.) + + For most (but not all) audio files, the minimum possible sample value will be ``-1.0f`` and the + maximum sample value will be ``+1.0f``. + + .. note:: + For convenience, the ``num_frames`` argument may be a floating-point number. However, if the + provided number of frames contains a fractional part (i.e.: ``1.01`` instead of ``1.00``) then + an exception will be thrown, as a fractional number of samples cannot be returned. + """ + + def seek(self, position: int) -> None: + """ + Seek this file to the provided location in frames. Future reads will start from this position. + """ + + def seekable(self) -> bool: + """ + Returns True if this file is currently open and calls to seek() will work. + """ + + def tell(self) -> int: + """ + Return the current position of the read pointer in this audio file, in frames. This value will increase as :meth:`read` is called, and may decrease if :meth:`seek` is called. + """ + + @property + def closed(self) -> bool: + """ + True iff either this file or its wrapped :class:`ReadableAudioFile` instance are closed (and no longer usable), False otherwise. + + + """ + + @property + def duration(self) -> float: + """ + The duration of this file in seconds (``frames`` divided by ``samplerate``). + + .. warning:: + :py:attr:`duration` may be an overestimate for certain MP3 files. + Use :py:attr:`exact_duration_known` property to determine if + :py:attr:`duration` is accurate. (See the documentation for the + :py:attr:`frames` attribute for more details.) + + + """ + + @property + def exact_duration_known(self) -> bool: + """ + Returns :py:const:`True` if this file's :py:attr:`frames` and + :py:attr:`duration` attributes are exact values, or :py:const:`False` if the + :py:attr:`frames` and :py:attr:`duration` attributes are estimates based + on the file's size and bitrate. + + :py:attr:`exact_duration_known` will change from :py:const:`False` to + :py:const:`True` as the file is read to completion. Once :py:const:`True`, + this value will not change back to :py:const:`False` for the same + :py:class:`AudioFile` object (even after calls to :meth:`seek`). + + .. note:: + :py:attr:`exact_duration_known` will only ever be :py:const:`False` + when reading certain MP3 files. For files in other formats than MP3, + :py:attr:`exact_duration_known` will always be equal to :py:const:`True`. + + *Introduced in v0.7.2.* + + + """ + + @property + def file_dtype(self) -> str: + """ + The data type (``"int16"``, ``"float32"``, etc) stored natively by this file. + + Note that :meth:`read` will always return a ``float32`` array, regardless of the value of this property. + + + """ + + @property + def frames(self) -> int: + """ + The total number of frames (samples per channel) in this file. + + For example, if this file contains 10 seconds of stereo audio at sample rate + of 44,100 Hz, ``frames`` will return ``441,000``. + + .. warning:: + When reading certain MP3 files that have been encoded in constant bitrate mode, + the :py:attr:`frames` and :py:attr:`duration` properties may initially be estimates + and **may change as the file is read**. The :py:attr:`exact_duration_known` + property indicates if the values of :py:attr:`frames` and :py:attr:`duration` + are estimates or exact values. + + + """ + + @property + def name(self) -> typing.Optional[str]: + """ + The name of this file. + + If the :class:`ReadableAudioFile` wrapped by this :class:`ChannelConvertedReadableAudioFile` was opened from a file-like object, this will be ``None``. + + + """ + + @property + def num_channels(self) -> int: + """ + The number of channels in this file. + + + """ + + @property + def samplerate(self) -> typing.Union[float, int]: + """ + The sample rate of this file in samples (per channel) per second (Hz). Sample rates are represented as floating-point numbers by default, but this property will be an integer if the file's sample rate has no fractional part. + + """ pass @@ -756,6 +1055,58 @@ class ResampledReadableAudioFile(AudioFile): Close this file, rendering this object unusable. Note that the :class:`ReadableAudioFile` instance that is wrapped by this object will not be closed, and will remain usable. """ + def resampled_to( + self, + target_sample_rate: float, + quality: pedalboard_native.Resample.Quality = pedalboard_native.Resample.Quality.WindowedSinc32, + ) -> typing.Union[ResampledReadableAudioFile, ResampledReadableAudioFile]: + """ + Return a :class:`ResampledReadableAudioFile` that will automatically resample this audio file to the provided `target_sample_rate`, using a constant amount of memory. + + If `target_sample_rate` matches the existing sample rate of the file, the original file will be returned. + + *Introduced in v0.6.0.* + """ + + def with_channels( + self, num_channels: int + ) -> typing.Union[ResampledReadableAudioFile, ChannelConvertedReadableAudioFile]: + """ + Return a :class:`ChannelConvertedReadableAudioFile` that will automatically convert the channel count of this audio file to the provided `num_channels`. + + If `num_channels` matches the existing channel count of the file, the original file will be returned. + + When converting from stereo (or multi-channel) to mono, all channels are averaged together with equal weighting. When converting from mono to stereo (or multi-channel), the mono signal is duplicated to all output channels. + + *Introduced in v0.9.17.* + """ + + def mono( + self, + ) -> typing.Union[ResampledReadableAudioFile, ChannelConvertedReadableAudioFile]: + """ + Return a :class:`ChannelConvertedReadableAudioFile` that will automatically convert this audio file to mono (1 channel). + + If this file is already mono, the original file will be returned. + + When converting from stereo (or multi-channel) to mono, all channels are averaged together with equal weighting. + + *Introduced in v0.9.17.* + """ + + def stereo( + self, + ) -> typing.Union[ResampledReadableAudioFile, ChannelConvertedReadableAudioFile]: + """ + Return a :class:`ChannelConvertedReadableAudioFile` that will automatically convert this audio file to stereo (2 channels). + + If this file is already stereo, the original file will be returned. + + When converting from mono to stereo, the mono signal is duplicated to both channels. When converting from multi-channel (3 or more channels) to stereo, only the first two channels are kept. + + *Introduced in v0.9.17.* + """ + def read( self, num_frames: typing.Union[float, int] = 0 ) -> NDArray[float32]: diff --git a/tests/test_channel_converted_io.py b/tests/test_channel_converted_io.py new file mode 100644 index 000000000..85b8de6a1 --- /dev/null +++ b/tests/test_channel_converted_io.py @@ -0,0 +1,414 @@ +#! /usr/bin/env python +# +# Copyright 2022 Spotify AB +# +# Licensed under the GNU Public License, Version 3.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.gnu.org/licenses/gpl-3.0.html +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from io import BytesIO + +import numpy as np +import pytest + +from pedalboard.io import AudioFile, ChannelConvertedReadableAudioFile + +from .utils import generate_sine_at + + +def test_mono_constructor(): + """Test that .mono() returns a ChannelConvertedReadableAudioFile.""" + stereo = np.random.rand(2, 44100).astype(np.float32) + + buf = BytesIO() + buf.name = "test.wav" + with AudioFile(buf, "w", 44100, 2, bit_depth=32) as f: + f.write(stereo) + + with AudioFile(BytesIO(buf.getvalue())) as f: + with f.mono() as m: + assert isinstance(m, ChannelConvertedReadableAudioFile) + assert m.num_channels == 1 + assert m.closed + assert not f.closed + assert f.closed + + +def test_stereo_constructor(): + """Test that .stereo() returns a ChannelConvertedReadableAudioFile.""" + mono = np.random.rand(1, 44100).astype(np.float32) + + buf = BytesIO() + buf.name = "test.wav" + with AudioFile(buf, "w", 44100, 1, bit_depth=32) as f: + f.write(mono) + + with AudioFile(BytesIO(buf.getvalue())) as f: + with f.stereo() as s: + assert isinstance(s, ChannelConvertedReadableAudioFile) + assert s.num_channels == 2 + assert s.closed + assert not f.closed + assert f.closed + + +def test_with_channels_constructor(): + """Test that .with_channels() returns a ChannelConvertedReadableAudioFile.""" + mono = np.random.rand(1, 44100).astype(np.float32) + + buf = BytesIO() + buf.name = "test.wav" + with AudioFile(buf, "w", 44100, 1, bit_depth=32) as f: + f.write(mono) + + with AudioFile(BytesIO(buf.getvalue())) as f: + with f.with_channels(4) as c: + assert isinstance(c, ChannelConvertedReadableAudioFile) + assert c.num_channels == 4 + assert c.closed + assert not f.closed + assert f.closed + + +def test_mono_does_nothing_if_already_mono(): + """Test that .mono() returns self if already mono.""" + mono = np.random.rand(1, 44100).astype(np.float32) + + buf = BytesIO() + buf.name = "test.wav" + with AudioFile(buf, "w", 44100, 1, bit_depth=32) as f: + f.write(mono) + + with AudioFile(BytesIO(buf.getvalue())) as f: + with f.mono() as m: + assert m is f + + +def test_stereo_does_nothing_if_already_stereo(): + """Test that .stereo() returns self if already stereo.""" + stereo = np.random.rand(2, 44100).astype(np.float32) + + buf = BytesIO() + buf.name = "test.wav" + with AudioFile(buf, "w", 44100, 2, bit_depth=32) as f: + f.write(stereo) + + with AudioFile(BytesIO(buf.getvalue())) as f: + with f.stereo() as s: + assert s is f + + +def test_with_channels_does_nothing_if_same(): + """Test that .with_channels(n) returns self if already n channels.""" + stereo = np.random.rand(2, 44100).astype(np.float32) + + buf = BytesIO() + buf.name = "test.wav" + with AudioFile(buf, "w", 44100, 2, bit_depth=32) as f: + f.write(stereo) + + with AudioFile(BytesIO(buf.getvalue())) as f: + with f.with_channels(2) as c: + assert c is f + + +def test_read_zero_raises(): + """Test that read() without arguments raises ValueError.""" + stereo = np.random.rand(2, 44100).astype(np.float32) + + buf = BytesIO() + buf.name = "test.wav" + with AudioFile(buf, "w", 44100, 2, bit_depth=32) as f: + f.write(stereo) + + with AudioFile(BytesIO(buf.getvalue())).mono() as f: + with pytest.raises(ValueError): + f.read() + + +@pytest.mark.parametrize("source_channels", [2, 4, 6, 8]) +def test_stereo_to_mono_averages_channels(source_channels: int): + """Test that converting to mono averages all channels.""" + # Create audio where each channel has a different constant value + num_samples = 44100 + audio = np.zeros((source_channels, num_samples), dtype=np.float32) + for c in range(source_channels): + audio[c, :] = float(c + 1) / source_channels # 0.5, 1.0 for stereo + + expected_mono_value = np.mean([float(c + 1) / source_channels for c in range(source_channels)]) + + buf = BytesIO() + buf.name = "test.wav" + with AudioFile(buf, "w", 44100, source_channels, bit_depth=32) as f: + f.write(audio) + + with AudioFile(BytesIO(buf.getvalue())).mono() as f: + mono = f.read(f.frames) + assert mono.shape == (1, num_samples) + np.testing.assert_allclose(mono[0, 1000], expected_mono_value, rtol=1e-5) + + +def test_mono_to_stereo_duplicates(): + """Test that converting mono to stereo duplicates the channel.""" + mono_value = 0.5 + mono = np.full((1, 44100), mono_value, dtype=np.float32) + + buf = BytesIO() + buf.name = "test.wav" + with AudioFile(buf, "w", 44100, 1, bit_depth=32) as f: + f.write(mono) + + with AudioFile(BytesIO(buf.getvalue())).stereo() as f: + stereo = f.read(f.frames) + assert stereo.shape == (2, 44100) + np.testing.assert_allclose(stereo[0, :], mono_value, rtol=1e-5) + np.testing.assert_allclose(stereo[1, :], mono_value, rtol=1e-5) + + +@pytest.mark.parametrize("target_channels", [2, 4, 6, 8]) +def test_mono_to_multichannel_duplicates(target_channels: int): + """Test that converting mono to multiple channels duplicates to all.""" + mono_value = 0.75 + mono = np.full((1, 44100), mono_value, dtype=np.float32) + + buf = BytesIO() + buf.name = "test.wav" + with AudioFile(buf, "w", 44100, 1, bit_depth=32) as f: + f.write(mono) + + with AudioFile(BytesIO(buf.getvalue())).with_channels(target_channels) as f: + audio = f.read(f.frames) + assert audio.shape == (target_channels, 44100) + for c in range(target_channels): + np.testing.assert_allclose(audio[c, :], mono_value, rtol=1e-5) + + +def test_stereo_to_multichannel_raises(): + """Test that converting stereo to multichannel raises an error.""" + stereo = np.random.rand(2, 44100).astype(np.float32) + + buf = BytesIO() + buf.name = "test.wav" + with AudioFile(buf, "w", 44100, 2, bit_depth=32) as f: + f.write(stereo) + + with pytest.raises(ValueError, match="not supported"): + AudioFile(BytesIO(buf.getvalue())).with_channels(6) + + +def test_multichannel_to_stereo_raises(): + """Test that converting multichannel to stereo raises an error.""" + surround = np.random.rand(6, 44100).astype(np.float32) + + buf = BytesIO() + buf.name = "test.wav" + with AudioFile(buf, "w", 44100, 6, bit_depth=32) as f: + f.write(surround) + + with pytest.raises(ValueError, match="not supported"): + AudioFile(BytesIO(buf.getvalue())).stereo() + + +def test_invalid_channel_count_raises(): + """Test that requesting 0 or negative channels raises an error.""" + mono = np.random.rand(1, 44100).astype(np.float32) + + buf = BytesIO() + buf.name = "test.wav" + with AudioFile(buf, "w", 44100, 1, bit_depth=32) as f: + f.write(mono) + + with pytest.raises(ValueError): + AudioFile(BytesIO(buf.getvalue())).with_channels(0) + + with pytest.raises(ValueError): + AudioFile(BytesIO(buf.getvalue())).with_channels(-1) + + +@pytest.mark.parametrize("chunk_size", [100, 1000, 10000]) +def test_tell_after_read(chunk_size: int): + """Test that tell() returns correct position after reads.""" + stereo = np.random.rand(2, 44100).astype(np.float32) + + buf = BytesIO() + buf.name = "test.wav" + with AudioFile(buf, "w", 44100, 2, bit_depth=32) as f: + f.write(stereo) + + with AudioFile(BytesIO(buf.getvalue())).mono() as f: + for i in range(0, f.frames, chunk_size): + assert f.tell() == i + if f.read(chunk_size).shape[-1] < chunk_size: + break + + +@pytest.mark.parametrize("offset", [0, 100, 1000, 22050]) +def test_seek(offset: int): + """Test that seek() works correctly.""" + stereo = np.random.rand(2, 44100).astype(np.float32) + + buf = BytesIO() + buf.name = "test.wav" + with AudioFile(buf, "w", 44100, 2, bit_depth=32) as f: + f.write(stereo) + + with AudioFile(BytesIO(buf.getvalue())).mono() as f: + # Read from start to offset position + if offset > 0: + f.read(offset) + expected = f.read(1000) + + # Seek back and verify + f.seek(offset) + assert f.tell() == offset + actual = f.read(1000) + + np.testing.assert_allclose(expected, actual) + + +def test_properties_accessible(): + """Test that all standard properties are accessible.""" + stereo = np.random.rand(2, 44100).astype(np.float32) + + buf = BytesIO() + buf.name = "test.wav" + with AudioFile(buf, "w", 44100, 2, bit_depth=32) as f: + f.write(stereo) + + with AudioFile(BytesIO(buf.getvalue())).mono() as f: + assert f.samplerate == 44100 + assert f.num_channels == 1 + assert f.frames == 44100 + assert f.duration == 1.0 + assert f.exact_duration_known is True + assert f.file_dtype == "float32" + assert f.closed is False + assert f.seekable() is True + + assert f.closed is True + + +def test_repr(): + """Test that __repr__ returns expected format.""" + stereo = np.random.rand(2, 44100).astype(np.float32) + + buf = BytesIO() + buf.name = "test.wav" + with AudioFile(buf, "w", 44100, 2, bit_depth=32) as f: + f.write(stereo) + + with AudioFile(BytesIO(buf.getvalue())).mono() as f: + repr_str = repr(f) + assert "ChannelConvertedReadableAudioFile" in repr_str + assert "samplerate=44100" in repr_str + assert "num_channels=1" in repr_str + + +# Chaining tests + + +def test_chain_resampled_to_then_mono(): + """Test chaining .resampled_to().mono().""" + stereo = generate_sine_at(44100, 440, num_seconds=1, num_channels=2).astype(np.float32) + + buf = BytesIO() + buf.name = "test.wav" + with AudioFile(buf, "w", 44100, 2, bit_depth=32) as f: + f.write(stereo) + + with AudioFile(BytesIO(buf.getvalue())).resampled_to(22050).mono() as f: + assert f.samplerate == 22050 + assert f.num_channels == 1 + audio = f.read(f.frames) + assert audio.shape[0] == 1 + # Allow for some variation due to resampling + assert abs(audio.shape[1] - 22050) <= 10 + + +def test_chain_mono_then_resampled_to(): + """Test chaining .mono().resampled_to().""" + stereo = generate_sine_at(44100, 440, num_seconds=1, num_channels=2).astype(np.float32) + + buf = BytesIO() + buf.name = "test.wav" + with AudioFile(buf, "w", 44100, 2, bit_depth=32) as f: + f.write(stereo) + + with AudioFile(BytesIO(buf.getvalue())).mono().resampled_to(22050) as f: + assert f.samplerate == 22050 + assert f.num_channels == 1 + audio = f.read(f.frames) + assert audio.shape[0] == 1 + # Allow for some variation due to resampling + assert abs(audio.shape[1] - 22050) <= 10 + + +def test_chain_mono_resampled_stereo(): + """Test triple chaining: .mono().resampled_to().stereo().""" + stereo = generate_sine_at(44100, 440, num_seconds=1, num_channels=2).astype(np.float32) + + buf = BytesIO() + buf.name = "test.wav" + with AudioFile(buf, "w", 44100, 2, bit_depth=32) as f: + f.write(stereo) + + with AudioFile(BytesIO(buf.getvalue())).mono().resampled_to(22050).stereo() as f: + assert f.samplerate == 22050 + assert f.num_channels == 2 + audio = f.read(f.frames) + assert audio.shape[0] == 2 + # Both channels should be identical (duplicated from mono) + np.testing.assert_allclose(audio[0], audio[1]) + + +def test_multichannel_to_stereo_via_mono(): + """Test the recommended workaround: multichannel -> mono -> stereo.""" + surround = np.random.rand(6, 44100).astype(np.float32) + + buf = BytesIO() + buf.name = "test.wav" + with AudioFile(buf, "w", 44100, 6, bit_depth=32) as f: + f.write(surround) + + # Direct conversion should fail + with pytest.raises(ValueError): + AudioFile(BytesIO(buf.getvalue())).stereo() + + # But via mono should work + with AudioFile(BytesIO(buf.getvalue())).mono().stereo() as f: + assert f.num_channels == 2 + audio = f.read(f.frames) + assert audio.shape == (2, 44100) + + +@pytest.mark.parametrize("chunk_size", [100, 1000, 10000, 44100]) +def test_read_in_chunks(chunk_size: int): + """Test reading in chunks produces same result as reading all at once.""" + stereo = np.random.rand(2, 44100).astype(np.float32) + + buf = BytesIO() + buf.name = "test.wav" + with AudioFile(buf, "w", 44100, 2, bit_depth=32) as f: + f.write(stereo) + + # Read all at once + with AudioFile(BytesIO(buf.getvalue())).mono() as f: + all_at_once = f.read(f.frames) + + # Read in chunks + with AudioFile(BytesIO(buf.getvalue())).mono() as f: + chunks = [] + while f.tell() < f.frames: + chunk = f.read(chunk_size) + chunks.append(chunk) + in_chunks = np.concatenate(chunks, axis=1) + + np.testing.assert_allclose(all_at_once, in_chunks)