Skip to content

Commit f633424

Browse files
committed
Add dummy audio processor
1 parent c201e59 commit f633424

File tree

8 files changed

+171
-93
lines changed

8 files changed

+171
-93
lines changed

main/CMakeLists.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,9 @@ file(GLOB BOARD_SOURCES
157157
list(APPEND SOURCES ${BOARD_SOURCES})
158158

159159
if(CONFIG_USE_AUDIO_PROCESSOR)
160-
list(APPEND SOURCES "audio_processing/audio_processor.cc")
160+
list(APPEND SOURCES "audio_processing/afe_audio_processor.cc")
161+
else()
162+
list(APPEND SOURCES "audio_processing/dummy_audio_processor.cc")
161163
endif()
162164
if(CONFIG_USE_WAKE_WORD_DETECT)
163165
list(APPEND SOURCES "audio_processing/wake_word_detect.cc")

main/application.cc

Lines changed: 23 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,12 @@
1010
#include "iot/thing_manager.h"
1111
#include "assets/lang_config.h"
1212

13+
#if CONFIG_USE_AUDIO_PROCESSOR
14+
#include "afe_audio_processor.h"
15+
#else
16+
#include "dummy_audio_processor.h"
17+
#endif
18+
1319
#include <cstring>
1420
#include <esp_log.h>
1521
#include <cJSON.h>
@@ -37,6 +43,12 @@ Application::Application() {
3743
event_group_ = xEventGroupCreate();
3844
background_task_ = new BackgroundTask(4096 * 8);
3945

46+
#if CONFIG_USE_AUDIO_PROCESSOR
47+
audio_processor_ = std::make_unique<AfeAudioProcessor>();
48+
#else
49+
audio_processor_ = std::make_unique<DummyAudioProcessor>();
50+
#endif
51+
4052
esp_timer_create_args_t clock_timer_args = {
4153
.callback = [](void* arg) {
4254
Application* app = (Application*)arg;
@@ -502,9 +514,8 @@ void Application::Start() {
502514
});
503515
bool protocol_started = protocol_->Start();
504516

505-
#if CONFIG_USE_AUDIO_PROCESSOR
506-
audio_processor_.Initialize(codec, realtime_chat_enabled_);
507-
audio_processor_.OnOutput([this](std::vector<int16_t>&& data) {
517+
audio_processor_->Initialize(codec, realtime_chat_enabled_);
518+
audio_processor_->OnOutput([this](std::vector<int16_t>&& data) {
508519
background_task_->Schedule([this, data = std::move(data)]() mutable {
509520
if (protocol_->IsAudioChannelBusy()) {
510521
return;
@@ -520,7 +531,7 @@ void Application::Start() {
520531
});
521532
});
522533
});
523-
audio_processor_.OnVadStateChange([this](bool speaking) {
534+
audio_processor_->OnVadStateChange([this](bool speaking) {
524535
if (device_state_ == kDeviceStateListening) {
525536
Schedule([this, speaking]() {
526537
if (speaking) {
@@ -533,7 +544,6 @@ void Application::Start() {
533544
});
534545
}
535546
});
536-
#endif
537547

538548
#if CONFIG_USE_WAKE_WORD_DETECT
539549
wake_word_detect_.Initialize(codec);
@@ -716,37 +726,16 @@ void Application::OnAudioInput() {
716726
}
717727
}
718728
#endif
719-
#if CONFIG_USE_AUDIO_PROCESSOR
720-
if (audio_processor_.IsRunning()) {
729+
if (audio_processor_->IsRunning()) {
721730
std::vector<int16_t> data;
722-
int samples = audio_processor_.GetFeedSize();
731+
int samples = audio_processor_->GetFeedSize();
723732
if (samples > 0) {
724733
ReadAudio(data, 16000, samples);
725-
audio_processor_.Feed(data);
734+
audio_processor_->Feed(data);
726735
return;
727736
}
728737
}
729-
#else
730-
if (device_state_ == kDeviceStateListening) {
731-
std::vector<int16_t> data;
732-
ReadAudio(data, 16000, 30 * 16000 / 1000);
733-
background_task_->Schedule([this, data = std::move(data)]() mutable {
734-
if (protocol_->IsAudioChannelBusy()) {
735-
return;
736-
}
737-
opus_encoder_->Encode(std::move(data), [this](std::vector<uint8_t>&& opus) {
738-
AudioStreamPacket packet;
739-
packet.payload = std::move(opus);
740-
packet.timestamp = last_output_timestamp_;
741-
last_output_timestamp_ = 0;
742-
Schedule([this, packet = std::move(packet)]() {
743-
protocol_->SendAudio(packet);
744-
});
745-
});
746-
});
747-
return;
748-
}
749-
#endif
738+
750739
vTaskDelay(pdMS_TO_TICKS(30));
751740
}
752741

@@ -818,9 +807,7 @@ void Application::SetDeviceState(DeviceState state) {
818807
case kDeviceStateIdle:
819808
display->SetStatus(Lang::Strings::STANDBY);
820809
display->SetEmotion("neutral");
821-
#if CONFIG_USE_AUDIO_PROCESSOR
822-
audio_processor_.Stop();
823-
#endif
810+
audio_processor_->Stop();
824811
#if CONFIG_USE_WAKE_WORD_DETECT
825812
wake_word_detect_.StartDetection();
826813
#endif
@@ -838,11 +825,7 @@ void Application::SetDeviceState(DeviceState state) {
838825
UpdateIotStates();
839826

840827
// Make sure the audio processor is running
841-
#if CONFIG_USE_AUDIO_PROCESSOR
842-
if (!audio_processor_.IsRunning()) {
843-
#else
844-
if (true) {
845-
#endif
828+
if (!audio_processor_->IsRunning()) {
846829
// Send the start listening command
847830
protocol_->SendStartListening(listening_mode_);
848831
if (listening_mode_ == kListeningModeAutoStop && previous_state == kDeviceStateSpeaking) {
@@ -853,18 +836,14 @@ void Application::SetDeviceState(DeviceState state) {
853836
#if CONFIG_USE_WAKE_WORD_DETECT
854837
wake_word_detect_.StopDetection();
855838
#endif
856-
#if CONFIG_USE_AUDIO_PROCESSOR
857-
audio_processor_.Start();
858-
#endif
839+
audio_processor_->Start();
859840
}
860841
break;
861842
case kDeviceStateSpeaking:
862843
display->SetStatus(Lang::Strings::SPEAKING);
863844

864845
if (listening_mode_ != kListeningModeRealtime) {
865-
#if CONFIG_USE_AUDIO_PROCESSOR
866-
audio_processor_.Stop();
867-
#endif
846+
audio_processor_->Stop();
868847
#if CONFIG_USE_WAKE_WORD_DETECT
869848
wake_word_detect_.StartDetection();
870849
#endif

main/application.h

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include <list>
1212
#include <vector>
1313
#include <condition_variable>
14+
#include <memory>
1415

1516
#include <opus_encoder.h>
1617
#include <opus_decoder.h>
@@ -19,13 +20,11 @@
1920
#include "protocol.h"
2021
#include "ota.h"
2122
#include "background_task.h"
23+
#include "audio_processor.h"
2224

2325
#if CONFIG_USE_WAKE_WORD_DETECT
2426
#include "wake_word_detect.h"
2527
#endif
26-
#if CONFIG_USE_AUDIO_PROCESSOR
27-
#include "audio_processor.h"
28-
#endif
2928

3029
#define SCHEDULE_EVENT (1 << 0)
3130
#define AUDIO_INPUT_READY_EVENT (1 << 1)
@@ -81,9 +80,7 @@ class Application {
8180
#if CONFIG_USE_WAKE_WORD_DETECT
8281
WakeWordDetect wake_word_detect_;
8382
#endif
84-
#if CONFIG_USE_AUDIO_PROCESSOR
85-
AudioProcessor audio_processor_;
86-
#endif
83+
std::unique_ptr<AudioProcessor> audio_processor_;
8784
Ota ota_;
8885
std::mutex mutex_;
8986
std::list<std::function<void()>> main_tasks_;

main/audio_processing/audio_processor.cc renamed to main/audio_processing/afe_audio_processor.cc

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
1-
#include "audio_processor.h"
1+
#include "afe_audio_processor.h"
22
#include <esp_log.h>
33

44
#define PROCESSOR_RUNNING 0x01
55

6-
static const char* TAG = "AudioProcessor";
6+
static const char* TAG = "AfeAudioProcessor";
77

8-
AudioProcessor::AudioProcessor()
8+
AfeAudioProcessor::AfeAudioProcessor()
99
: afe_data_(nullptr) {
1010
event_group_ = xEventGroupCreate();
1111
}
1212

13-
void AudioProcessor::Initialize(AudioCodec* codec, bool realtime_chat) {
13+
void AfeAudioProcessor::Initialize(AudioCodec* codec, bool realtime_chat) {
1414
codec_ = codec;
1515
int ref_num = codec_->input_reference() ? 1 : 0;
1616

@@ -51,57 +51,57 @@ void AudioProcessor::Initialize(AudioCodec* codec, bool realtime_chat) {
5151
afe_data_ = afe_iface_->create_from_config(afe_config);
5252

5353
xTaskCreate([](void* arg) {
54-
auto this_ = (AudioProcessor*)arg;
54+
auto this_ = (AfeAudioProcessor*)arg;
5555
this_->AudioProcessorTask();
5656
vTaskDelete(NULL);
5757
}, "audio_communication", 4096, this, 3, NULL);
5858
}
5959

60-
AudioProcessor::~AudioProcessor() {
60+
AfeAudioProcessor::~AfeAudioProcessor() {
6161
if (afe_data_ != nullptr) {
6262
afe_iface_->destroy(afe_data_);
6363
}
6464
vEventGroupDelete(event_group_);
6565
}
6666

67-
size_t AudioProcessor::GetFeedSize() {
67+
size_t AfeAudioProcessor::GetFeedSize() {
6868
if (afe_data_ == nullptr) {
6969
return 0;
7070
}
7171
return afe_iface_->get_feed_chunksize(afe_data_) * codec_->input_channels();
7272
}
7373

74-
void AudioProcessor::Feed(const std::vector<int16_t>& data) {
74+
void AfeAudioProcessor::Feed(const std::vector<int16_t>& data) {
7575
if (afe_data_ == nullptr) {
7676
return;
7777
}
7878
afe_iface_->feed(afe_data_, data.data());
7979
}
8080

81-
void AudioProcessor::Start() {
81+
void AfeAudioProcessor::Start() {
8282
xEventGroupSetBits(event_group_, PROCESSOR_RUNNING);
8383
}
8484

85-
void AudioProcessor::Stop() {
85+
void AfeAudioProcessor::Stop() {
8686
xEventGroupClearBits(event_group_, PROCESSOR_RUNNING);
8787
if (afe_data_ != nullptr) {
8888
afe_iface_->reset_buffer(afe_data_);
8989
}
9090
}
9191

92-
bool AudioProcessor::IsRunning() {
92+
bool AfeAudioProcessor::IsRunning() {
9393
return xEventGroupGetBits(event_group_) & PROCESSOR_RUNNING;
9494
}
9595

96-
void AudioProcessor::OnOutput(std::function<void(std::vector<int16_t>&& data)> callback) {
96+
void AfeAudioProcessor::OnOutput(std::function<void(std::vector<int16_t>&& data)> callback) {
9797
output_callback_ = callback;
9898
}
9999

100-
void AudioProcessor::OnVadStateChange(std::function<void(bool speaking)> callback) {
100+
void AfeAudioProcessor::OnVadStateChange(std::function<void(bool speaking)> callback) {
101101
vad_state_change_callback_ = callback;
102102
}
103103

104-
void AudioProcessor::AudioProcessorTask() {
104+
void AfeAudioProcessor::AudioProcessorTask() {
105105
auto fetch_size = afe_iface_->get_fetch_chunksize(afe_data_);
106106
auto feed_size = afe_iface_->get_feed_chunksize(afe_data_);
107107
ESP_LOGI(TAG, "Audio communication task started, feed size: %d fetch size: %d",
@@ -136,4 +136,4 @@ void AudioProcessor::AudioProcessorTask() {
136136
output_callback_(std::vector<int16_t>(res->data, res->data + res->data_size / sizeof(int16_t)));
137137
}
138138
}
139-
}
139+
}
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
#ifndef AFE_AUDIO_PROCESSOR_H
2+
#define AFE_AUDIO_PROCESSOR_H
3+
4+
#include <esp_afe_sr_models.h>
5+
#include <freertos/FreeRTOS.h>
6+
#include <freertos/task.h>
7+
#include <freertos/event_groups.h>
8+
9+
#include <string>
10+
#include <vector>
11+
#include <functional>
12+
13+
#include "audio_processor.h"
14+
#include "audio_codec.h"
15+
16+
class AfeAudioProcessor : public AudioProcessor {
17+
public:
18+
AfeAudioProcessor();
19+
~AfeAudioProcessor();
20+
21+
void Initialize(AudioCodec* codec, bool realtime_chat) override;
22+
void Feed(const std::vector<int16_t>& data) override;
23+
void Start() override;
24+
void Stop() override;
25+
bool IsRunning() override;
26+
void OnOutput(std::function<void(std::vector<int16_t>&& data)> callback) override;
27+
void OnVadStateChange(std::function<void(bool speaking)> callback) override;
28+
size_t GetFeedSize() override;
29+
30+
private:
31+
EventGroupHandle_t event_group_ = nullptr;
32+
esp_afe_sr_iface_t* afe_iface_ = nullptr;
33+
esp_afe_sr_data_t* afe_data_ = nullptr;
34+
std::function<void(std::vector<int16_t>&& data)> output_callback_;
35+
std::function<void(bool speaking)> vad_state_change_callback_;
36+
AudioCodec* codec_ = nullptr;
37+
bool is_speaking_ = false;
38+
39+
void AudioProcessorTask();
40+
};
41+
42+
#endif
Lines changed: 10 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,6 @@
11
#ifndef AUDIO_PROCESSOR_H
22
#define AUDIO_PROCESSOR_H
33

4-
#include <esp_afe_sr_models.h>
5-
#include <freertos/FreeRTOS.h>
6-
#include <freertos/task.h>
7-
#include <freertos/event_groups.h>
8-
94
#include <string>
105
#include <vector>
116
#include <functional>
@@ -14,28 +9,16 @@
149

1510
class AudioProcessor {
1611
public:
17-
AudioProcessor();
18-
~AudioProcessor();
19-
20-
void Initialize(AudioCodec* codec, bool realtime_chat);
21-
void Feed(const std::vector<int16_t>& data);
22-
void Start();
23-
void Stop();
24-
bool IsRunning();
25-
void OnOutput(std::function<void(std::vector<int16_t>&& data)> callback);
26-
void OnVadStateChange(std::function<void(bool speaking)> callback);
27-
size_t GetFeedSize();
28-
29-
private:
30-
EventGroupHandle_t event_group_ = nullptr;
31-
esp_afe_sr_iface_t* afe_iface_ = nullptr;
32-
esp_afe_sr_data_t* afe_data_ = nullptr;
33-
std::function<void(std::vector<int16_t>&& data)> output_callback_;
34-
std::function<void(bool speaking)> vad_state_change_callback_;
35-
AudioCodec* codec_ = nullptr;
36-
bool is_speaking_ = false;
37-
38-
void AudioProcessorTask();
12+
virtual ~AudioProcessor() = default;
13+
14+
virtual void Initialize(AudioCodec* codec, bool realtime_chat) = 0;
15+
virtual void Feed(const std::vector<int16_t>& data) = 0;
16+
virtual void Start() = 0;
17+
virtual void Stop() = 0;
18+
virtual bool IsRunning() = 0;
19+
virtual void OnOutput(std::function<void(std::vector<int16_t>&& data)> callback) = 0;
20+
virtual void OnVadStateChange(std::function<void(bool speaking)> callback) = 0;
21+
virtual size_t GetFeedSize() = 0;
3922
};
4023

4124
#endif

0 commit comments

Comments
 (0)