Skip to content

[WIP] Add AVFoundation Loader and MonoLoader. #1153

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions src/algorithms/io/avfoundationaudiofile.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#ifndef AVFoundationCPPGlue_hpp
#define AVFoundationCPPGlue_hpp

#include <stdio.h>
#include <string>
#include <stdexcept>

class AVFoundationAudioFile {
private:
void *_file;
void *_buffer;

public:
// Note: Unfortunately, Apple's API demands reading buffers into AVAudioPCMBuffer.
// Reading into custom memory is not allowed. So this class maintains a buffer - if
// reading into outside buffers is desired, read using this class, and then memcpy.

AVFoundationAudioFile(const std::string& filename, const int bufferLength);
~AVFoundationAudioFile();

// Returns new frameLength
int readNext();

const uint64_t getFramePosition();
void setFramePosition(const uint64_t framePosition);

float **buffers;
// Current number of valid frames in the buffer
int frameLength;

uint64_t length;

int channels;
float sample_rate;
std::string codec;
int bit_rate;
bool is_interleaved;
int stride;
};

#endif /* AVFoundationCPPGlue_hpp */
75 changes: 75 additions & 0 deletions src/algorithms/io/avfoundationaudiofile.mm
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#import "avfoundationaudiofile.hpp"

#import <Foundation/Foundation.h>
#import <AVFoundation/AVFoundation.h>

AVFoundationAudioFile::~AVFoundationAudioFile() {
if (_file) {
CFBridgingRelease(_file);
_file = NULL;
}
if (_buffer) {
CFBridgingRelease(_buffer);
_buffer = NULL;
}
}

AVFoundationAudioFile::AVFoundationAudioFile(const std::string& filename, const int bufferLength): _file(NULL), _buffer(NULL) {
// Read file

NSError *error = nil;
NSURL *url = [NSURL fileURLWithPath: [NSString stringWithUTF8String: filename.c_str()]];
AVAudioFile *file = [[AVAudioFile alloc] initForReading: url error: &error];

if (error) {
throw std::runtime_error([[error localizedDescription] UTF8String]);
}

_file = (void *) CFBridgingRetain(file);

// Read metadata for convenient access

length = [file length];

AVAudioFormat *format = [file processingFormat];

channels = [format channelCount];
sample_rate = [format sampleRate];
codec = [[format className] UTF8String];
int bitDepth = [[[format settings] objectForKey: AVLinearPCMBitDepthKey] intValue];
bit_rate = (int) (channels * sample_rate * bitDepth);
is_interleaved = [format isInterleaved];

// Create buffer

AVAudioPCMBuffer *pcmBuffer = [[AVAudioPCMBuffer alloc] initWithPCMFormat:format frameCapacity: bufferLength / bitDepth];
_buffer = (void *) CFBridgingRetain(pcmBuffer);
buffers = (float **)[pcmBuffer floatChannelData];
AVFoundationAudioFile::frameLength = pcmBuffer.frameLength = 0;
stride = [pcmBuffer stride];
}

int AVFoundationAudioFile::readNext() {
AVAudioFile *file = (__bridge AVAudioFile *) _file;
AVAudioPCMBuffer *pcmBuffer = (__bridge AVAudioPCMBuffer *) _buffer;
NSError *error = nil;

[file readIntoBuffer:pcmBuffer error: &error];

if (error) {
throw std::runtime_error([[error localizedDescription] STDstring]);
}

frameLength = pcmBuffer.frameLength;
return frameLength;
}

void AVFoundationAudioFile::setFramePosition(const uint64_t framePosition) {
AVAudioFile *file = (__bridge AVAudioFile *) _file;
file.framePosition = framePosition;
}

const uint64_t AVFoundationAudioFile::getFramePosition() {
AVAudioFile *file = (__bridge AVAudioFile *) _file;
return file.framePosition;
}
136 changes: 136 additions & 0 deletions src/algorithms/io/avfoundationloader.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
#import "avfoundationloader.hpp"
#import "algorithmfactory.h"
#import <iomanip> // setw()

using namespace std;

namespace essentia {
namespace streaming {

const char* AVAudioLoader::name = "AVAudioLoader";
const char* AVAudioLoader::category = "Input/output";
const char* AVAudioLoader::description = DOC("An AudioLoader using AVFoundation.");


AVAudioLoader::~AVAudioLoader() {
closeAudioFile();
}

void AVAudioLoader::configure() {
_computeMD5 = parameter("computeMD5").toBool();
_selectedStream = parameter("audioStream").toInt();
reset();
}


void AVAudioLoader::openAudioFile(const string& filename) {
E_DEBUG(EAlgorithm, "AVAudioLoader: opening file: " << filename);

_file = new AVFoundationAudioFile(filename, BUFFER_SIZE);
}


void AVAudioLoader::closeAudioFile() {
if (_file) {
delete _file;
_file = NULL;
}
}


void AVAudioLoader::pushChannelsSampleRateInfo(int nChannels, Real sampleRate) {
if (nChannels > 2) {
throw EssentiaException("AVAudioLoader: could not load audio. Audio file has more than 2 channels.");
}
if (sampleRate <= 0) {
throw EssentiaException("AVAudioLoader: could not load audio. Audio sampling rate must be greater than 0.");
}

_nChannels = nChannels;

_channels.push(nChannels);
_sampleRate.push(sampleRate);
}


void AVAudioLoader::pushCodecInfo(std::string codec, int bit_rate) {
_codec.push(codec);
_bit_rate.push(bit_rate);
}


AlgorithmStatus AVAudioLoader::process() {
if (!parameter("filename").isConfigured()) {
throw EssentiaException("AVAudioLoader: Trying to call process() on an AVAudioLoader algo which hasn't been correctly configured.");
}
if (_computeMD5) {
throw EssentiaException("AVAudioLoader: computeMD5 is not implemented.");
}

int framesRead = _file->readNext();
if (!framesRead) {
shouldStop(true);
// copyOutput();
closeAudioFile();

string md5 = "";
_md5.push(md5);

return FINISHED;
}

copyOutput();

return OK;
}

void AVAudioLoader::copyOutput() {
int nsamples = _file->frameLength;
int stride = _file->stride;

bool ok = _audio.acquire(nsamples);
if (!ok) {
throw EssentiaException("AudioLoader: could not acquire output for audio");
}

vector<StereoSample>& audio = *((vector<StereoSample>*)_audio.getTokens());

if (_nChannels == 1) {
float *buffer = _file->buffers[0];

for (int i=0; i<nsamples; i++) {
audio[i].left() = *buffer;
buffer += stride;
}
}
else { // _nChannels == 2
float *left = _file->buffers[0];
float *right = _file->buffers[1];

for (int i=0, loc=0; i<nsamples; i++) {
audio[i].left() = *left;
audio[i].right() = *right;
left += stride;
right += stride;
}
}

_audio.release(nsamples);
}

void AVAudioLoader::reset() {
Algorithm::reset();

if (!parameter("filename").isConfigured()) return;

string filename = parameter("filename").toString();

closeAudioFile();
openAudioFile(filename);

pushChannelsSampleRateInfo(_file->channels, _file->sample_rate);
pushCodecInfo(_file->codec, _file->bit_rate);
}

} // namespace streaming
} // namespace essentia
87 changes: 87 additions & 0 deletions src/algorithms/io/avfoundationloader.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
#ifndef AVFoundationLoader_hpp
#define AVFoundationLoader_hpp

#include "streamingalgorithm.h"
#include "network.h"
#include "poolstorage.h"
#include "AVFoundationAudioFile.hpp"

#define MAX_AUDIO_FRAME_SIZE 192000

namespace essentia {
namespace streaming {

class AVAudioLoader : public Algorithm {
protected:
Source<StereoSample> _audio;
AbsoluteSource<Real> _sampleRate;
AbsoluteSource<int> _channels;
AbsoluteSource<std::string> _md5;
AbsoluteSource<int> _bit_rate;
AbsoluteSource<std::string> _codec;

int _nChannels;

// MAX_AUDIO_FRAME_SIZE is in bytes, multiply it by 2 to get some margin,
// because we might want to decode multiple frames in this buffer (all the
// frames contained in a packet, which can be more than 1 as in flac), and
// each time we decode a frame we need to have at least a full buffer of free space.
const static size_t BUFFER_SIZE = MAX_AUDIO_FRAME_SIZE * 2;

AVFoundationAudioFile *_file;
uint8_t _checksum[16];
bool _computeMD5;

struct AVAudioResampleContext* _convertCtxAv;

int _streamIdx; // index of the audio stream among all the streams contained in the file
std::vector<int> _streams;
int _selectedStream;
bool _configured;


void openAudioFile(const std::string& filename);
void closeAudioFile();

void pushChannelsSampleRateInfo(int nChannels, Real sampleRate);
void pushCodecInfo(std::string codec, int bit_rate);

void copyOutput();

public:
AVAudioLoader() : Algorithm(), _configured(false), _file(NULL) {

declareOutput(_audio, 1, "audio", "the input audio signal");
declareOutput(_sampleRate, 0, "sampleRate", "the sampling rate of the audio signal [Hz]");
declareOutput(_channels, 0, "numberChannels", "the number of channels");
declareOutput(_md5, 0, "md5", "the MD5 checksum of raw undecoded audio payload");
declareOutput(_bit_rate, 0, "bit_rate", "the bit rate of the input audio, as reported by the decoder codec");
declareOutput(_codec, 0, "codec", "the codec that is used to decode the input audio");

_audio.setBufferType(BufferUsage::forLargeAudioStream);
}

~AVAudioLoader();

AlgorithmStatus process();
void reset();

void declareParameters() {
declareParameter("filename", "the name of the file from which to read", "", Parameter::STRING);
declareParameter("computeMD5", "compute the MD5 checksum", "{true,false}", false);
declareParameter("audioStream", "audio stream index to be loaded. Other streams are not taken into account (e.g. if stream 0 is video and 1 is audio use index 0 to access it.)", "[0,inf)", 0);
}

void configure();

static const char* name;
static const char* category;
static const char* description;

};

} // namespace streaming
} // namespace essentia


#endif /* AVFoundationLoader_hpp */
Loading