MTG · Ivorforce · May 23, 2021 · Jun 6, 2021 · Jun 6, 2021 · Jun 6, 2021
diff --git a/src/algorithms/io/avfoundationaudiofile.hpp b/src/algorithms/io/avfoundationaudiofile.hpp
@@ -0,0 +1,41 @@
+#ifndef AVFoundationCPPGlue_hpp
+#define AVFoundationCPPGlue_hpp
+
+#include <stdio.h>
+#include <string>
+#include <stdexcept>
+
+class AVFoundationAudioFile {
+private:
+    void *_file;
+    void *_buffer;
+
+public:
+    // Note: Unfortunately, Apple's API demands reading buffers into AVAudioPCMBuffer.
+    // Reading into custom memory is not allowed. So this class maintains a buffer - if
+    // reading into outside buffers is desired, read using this class, and then memcpy.
+
+    AVFoundationAudioFile(const std::string& filename, const int bufferLength);
+    ~AVFoundationAudioFile();
+
+    // Returns new frameLength
+    int readNext();
+
+    const uint64_t getFramePosition();
+    void setFramePosition(const uint64_t framePosition);
+
+    float **buffers;
+    // Current number of valid frames in the buffer
+    int frameLength;
+
+    uint64_t length;
+
+    int channels;
+    float sample_rate;
+    std::string codec;
+    int bit_rate;
+    bool is_interleaved;
+    int stride;
+};
+
+#endif /* AVFoundationCPPGlue_hpp */
diff --git a/src/algorithms/io/avfoundationaudiofile.mm b/src/algorithms/io/avfoundationaudiofile.mm
@@ -0,0 +1,75 @@
+#import "avfoundationaudiofile.hpp"
+
+#import <Foundation/Foundation.h>
+#import <AVFoundation/AVFoundation.h>
+
+AVFoundationAudioFile::~AVFoundationAudioFile() {
+  if (_file) {
+    CFBridgingRelease(_file);
+    _file = NULL;
+  }
+  if (_buffer) {
+    CFBridgingRelease(_buffer);
+    _buffer = NULL;
+  }
+}
+
+AVFoundationAudioFile::AVFoundationAudioFile(const std::string& filename, const int bufferLength): _file(NULL), _buffer(NULL) {
+  // Read file
+
+  NSError *error = nil;
+  NSURL *url = [NSURL fileURLWithPath: [NSString stringWithUTF8String: filename.c_str()]];
+  AVAudioFile *file = [[AVAudioFile alloc] initForReading: url error: &error];
+
+  if (error) {
+    throw std::runtime_error([[error localizedDescription] UTF8String]);
+  }
+
+  _file = (void *) CFBridgingRetain(file);
+
+  // Read metadata for convenient access
+
+  length = [file length];
+
+  AVAudioFormat *format = [file processingFormat];
+
+  channels = [format channelCount];
+  sample_rate = [format sampleRate];
+  codec = [[format className] UTF8String];
+  int bitDepth = [[[format settings] objectForKey: AVLinearPCMBitDepthKey] intValue];
+  bit_rate = (int) (channels * sample_rate * bitDepth);
+  is_interleaved = [format isInterleaved];
+
+  // Create buffer
+
+  AVAudioPCMBuffer *pcmBuffer = [[AVAudioPCMBuffer alloc] initWithPCMFormat:format frameCapacity: bufferLength / bitDepth];
+  _buffer = (void *) CFBridgingRetain(pcmBuffer);
+  buffers = (float **)[pcmBuffer floatChannelData];
+  AVFoundationAudioFile::frameLength = pcmBuffer.frameLength = 0;
+  stride = [pcmBuffer stride];
+}
+
+int AVFoundationAudioFile::readNext() {
+  AVAudioFile *file = (__bridge AVAudioFile *) _file;
+  AVAudioPCMBuffer *pcmBuffer = (__bridge AVAudioPCMBuffer *) _buffer;
+  NSError *error = nil;
+
+  [file readIntoBuffer:pcmBuffer error: &error];
+
+  if (error) {
+    throw std::runtime_error([[error localizedDescription] STDstring]);
+  }
+
+  frameLength = pcmBuffer.frameLength;
+  return frameLength;
+}
+
+void AVFoundationAudioFile::setFramePosition(const uint64_t framePosition) {
+  AVAudioFile *file = (__bridge AVAudioFile *) _file;
+  file.framePosition = framePosition;
+}
+
+const uint64_t AVFoundationAudioFile::getFramePosition() {
+  AVAudioFile *file = (__bridge AVAudioFile *) _file;
+  return file.framePosition;
+}
diff --git a/src/algorithms/io/avfoundationloader.cpp b/src/algorithms/io/avfoundationloader.cpp
@@ -0,0 +1,136 @@
+#import "avfoundationloader.hpp"
+#import "algorithmfactory.h"
+#import <iomanip>  //  setw()
+
+using namespace std;
+
+namespace essentia {
+namespace streaming {
+
+const char* AVAudioLoader::name = "AVAudioLoader";
+const char* AVAudioLoader::category = "Input/output";
+const char* AVAudioLoader::description = DOC("An AudioLoader using AVFoundation.");
+
+
+AVAudioLoader::~AVAudioLoader() {
+  closeAudioFile();
+}
+
+void AVAudioLoader::configure() {
+  _computeMD5 = parameter("computeMD5").toBool();
+  _selectedStream = parameter("audioStream").toInt();
+  reset();
+}
+
+
+void AVAudioLoader::openAudioFile(const string& filename) {
+  E_DEBUG(EAlgorithm, "AVAudioLoader: opening file: " << filename);
+
+  _file = new AVFoundationAudioFile(filename, BUFFER_SIZE);
+}
+
+
+void AVAudioLoader::closeAudioFile() {
+  if (_file) {
+    delete _file;
+    _file = NULL;
+  }
+}
+
+
+void AVAudioLoader::pushChannelsSampleRateInfo(int nChannels, Real sampleRate) {
+  if (nChannels > 2) {
+    throw EssentiaException("AVAudioLoader: could not load audio. Audio file has more than 2 channels.");
+  }
+  if (sampleRate <= 0) {
+    throw EssentiaException("AVAudioLoader: could not load audio. Audio sampling rate must be greater than 0.");
+  }
+
+  _nChannels = nChannels;
+
+  _channels.push(nChannels);
+  _sampleRate.push(sampleRate);
+}
+
+
+void AVAudioLoader::pushCodecInfo(std::string codec, int bit_rate) {
+  _codec.push(codec);
+  _bit_rate.push(bit_rate);
+}
+
+
+AlgorithmStatus AVAudioLoader::process() {
+  if (!parameter("filename").isConfigured()) {
+    throw EssentiaException("AVAudioLoader: Trying to call process() on an AVAudioLoader algo which hasn't been correctly configured.");
+  }
+  if (_computeMD5) {
+    throw EssentiaException("AVAudioLoader: computeMD5 is not implemented.");
+  }
+
+  int framesRead = _file->readNext();
+  if (!framesRead) {
+    shouldStop(true);
+//    copyOutput();
+    closeAudioFile();
+
+    string md5 = "";
+    _md5.push(md5);
+
+    return FINISHED;
+  }
+
+  copyOutput();
+
+  return OK;
+}
+
+void AVAudioLoader::copyOutput() {
+  int nsamples = _file->frameLength;
+  int stride = _file->stride;
+
+  bool ok = _audio.acquire(nsamples);
+  if (!ok) {
+    throw EssentiaException("AudioLoader: could not acquire output for audio");
+  }
+
+  vector<StereoSample>& audio = *((vector<StereoSample>*)_audio.getTokens());
+
+  if (_nChannels == 1) {
+    float *buffer = _file->buffers[0];
+
+    for (int i=0; i<nsamples; i++) {
+      audio[i].left() = *buffer;
+      buffer += stride;
+    }
+  }
+  else { // _nChannels == 2
+    float *left = _file->buffers[0];
+    float *right = _file->buffers[1];
+
+    for (int i=0, loc=0; i<nsamples; i++) {
+      audio[i].left() = *left;
+      audio[i].right() = *right;
+      left += stride;
+      right += stride;
+    }
+  }
+
+  _audio.release(nsamples);
+}
+
+void AVAudioLoader::reset() {
+  Algorithm::reset();
+
+  if (!parameter("filename").isConfigured()) return;
+
+  string filename = parameter("filename").toString();
+
+  closeAudioFile();
+  openAudioFile(filename);
+
+  pushChannelsSampleRateInfo(_file->channels, _file->sample_rate);
+  pushCodecInfo(_file->codec, _file->bit_rate);
+}
+
+} // namespace streaming
+} // namespace essentia
diff --git a/src/algorithms/io/avfoundationloader.hpp b/src/algorithms/io/avfoundationloader.hpp
@@ -0,0 +1,87 @@
+#ifndef AVFoundationLoader_hpp
+#define AVFoundationLoader_hpp
+
+#include "streamingalgorithm.h"
+#include "network.h"
+#include "poolstorage.h"
+#include "AVFoundationAudioFile.hpp"
+
+#define MAX_AUDIO_FRAME_SIZE 192000
+
+namespace essentia {
+namespace streaming {
+
+class AVAudioLoader : public Algorithm {
+ protected:
+  Source<StereoSample> _audio;
+  AbsoluteSource<Real> _sampleRate;
+  AbsoluteSource<int> _channels;
+  AbsoluteSource<std::string> _md5;
+  AbsoluteSource<int> _bit_rate;
+  AbsoluteSource<std::string> _codec;
+
+  int _nChannels;
+
+  // MAX_AUDIO_FRAME_SIZE is in bytes, multiply it by 2 to get some margin,
+  // because we might want to decode multiple frames in this buffer (all the
+  // frames contained in a packet, which can be more than 1 as in flac), and
+  // each time we decode a frame we need to have at least a full buffer of free space.
+  const static size_t BUFFER_SIZE = MAX_AUDIO_FRAME_SIZE * 2;
+
+  AVFoundationAudioFile *_file;
+  uint8_t _checksum[16];
+  bool _computeMD5;
+
+  struct AVAudioResampleContext* _convertCtxAv;
+
+  int _streamIdx; // index of the audio stream among all the streams contained in the file
+  std::vector<int> _streams;
+  int _selectedStream;
+  bool _configured;
+
+
+  void openAudioFile(const std::string& filename);
+  void closeAudioFile();
+
+  void pushChannelsSampleRateInfo(int nChannels, Real sampleRate);
+  void pushCodecInfo(std::string codec, int bit_rate);
+
+  void copyOutput();
+
+ public:
+  AVAudioLoader() : Algorithm(), _configured(false), _file(NULL) {
+
+  declareOutput(_audio, 1, "audio", "the input audio signal");
+  declareOutput(_sampleRate, 0, "sampleRate", "the sampling rate of the audio signal [Hz]");
+  declareOutput(_channels, 0, "numberChannels", "the number of channels");
+  declareOutput(_md5, 0, "md5", "the MD5 checksum of raw undecoded audio payload");
+  declareOutput(_bit_rate, 0, "bit_rate", "the bit rate of the input audio, as reported by the decoder codec");
+  declareOutput(_codec, 0, "codec", "the codec that is used to decode the input audio");
+
+  _audio.setBufferType(BufferUsage::forLargeAudioStream);
+  }
+
+  ~AVAudioLoader();
+
+  AlgorithmStatus process();
+  void reset();
+
+  void declareParameters() {
+  declareParameter("filename", "the name of the file from which to read", "", Parameter::STRING);
+  declareParameter("computeMD5", "compute the MD5 checksum", "{true,false}", false);
+  declareParameter("audioStream", "audio stream index to be loaded. Other streams are not taken into account (e.g. if stream 0 is video and 1 is audio use index 0 to access it.)", "[0,inf)", 0);
+  }
+
+  void configure();
+
+  static const char* name;
+  static const char* category;
+  static const char* description;
+
+};
+
+} // namespace streaming
+} // namespace essentia
+
+
+#endif /* AVFoundationLoader_hpp */