mirror of
https://github.com/78/xiaozhi-esp32.git
synced 2025-05-17 07:08:03 +08:00
reconstruct application
This commit is contained in:
@ -4,7 +4,7 @@
|
||||
# CMakeLists in this exact order for cmake to work correctly
|
||||
cmake_minimum_required(VERSION 3.16)
|
||||
|
||||
set(PROJECT_VER "0.3.0")
|
||||
set(PROJECT_VER "0.3.1")
|
||||
|
||||
include($ENV{IDF_PATH}/tools/cmake/project.cmake)
|
||||
project(xiaozhi)
|
||||
|
@ -1,27 +1,28 @@
|
||||
#include "Application.h"
|
||||
#include "BuiltinLed.h"
|
||||
#include "TlsTransport.h"
|
||||
#include "Ml307SslTransport.h"
|
||||
#include "WifiConfigurationAp.h"
|
||||
#include "WifiStation.h"
|
||||
#include <BuiltinLed.h>
|
||||
#include <TlsTransport.h>
|
||||
#include <Ml307SslTransport.h>
|
||||
#include <WifiConfigurationAp.h>
|
||||
#include <WifiStation.h>
|
||||
#include <SystemInfo.h>
|
||||
|
||||
#include <cstring>
|
||||
#include "esp_log.h"
|
||||
#include "model_path.h"
|
||||
#include "SystemInfo.h"
|
||||
#include "cJSON.h"
|
||||
#include "driver/gpio.h"
|
||||
#include <esp_log.h>
|
||||
#include <cJSON.h>
|
||||
#include <driver/gpio.h>
|
||||
|
||||
#include "Application.h"
|
||||
|
||||
#define TAG "Application"
|
||||
|
||||
|
||||
Application::Application()
|
||||
: button_((gpio_num_t)CONFIG_BOOT_BUTTON_GPIO)
|
||||
#ifdef CONFIG_USE_ML307
|
||||
: ml307_at_modem_(CONFIG_ML307_TX_PIN, CONFIG_ML307_RX_PIN, 4096),
|
||||
, ml307_at_modem_(CONFIG_ML307_TX_PIN, CONFIG_ML307_RX_PIN, 4096),
|
||||
http_(ml307_at_modem_),
|
||||
firmware_upgrade_(http_)
|
||||
#else
|
||||
: http_(),
|
||||
, http_(),
|
||||
firmware_upgrade_(http_)
|
||||
#endif
|
||||
#ifdef CONFIG_USE_DISPLAY
|
||||
@ -29,16 +30,6 @@ Application::Application()
|
||||
#endif
|
||||
{
|
||||
event_group_ = xEventGroupCreate();
|
||||
audio_encode_queue_ = xQueueCreate(100, sizeof(iovec));
|
||||
audio_decode_queue_ = xQueueCreate(100, sizeof(AudioPacket*));
|
||||
|
||||
srmodel_list_t *models = esp_srmodel_init("model");
|
||||
for (int i = 0; i < models->num; i++) {
|
||||
ESP_LOGI(TAG, "Model %d: %s", i, models->model_name[i]);
|
||||
if (strstr(models->model_name[i], ESP_WN_PREFIX) != NULL) {
|
||||
wakenet_model_ = models->model_name[i];
|
||||
}
|
||||
}
|
||||
|
||||
opus_encoder_.Configure(CONFIG_AUDIO_INPUT_SAMPLE_RATE, 1);
|
||||
opus_decoder_ = opus_decoder_create(opus_decode_sample_rate_, 1, NULL);
|
||||
@ -52,32 +43,12 @@ Application::Application()
|
||||
}
|
||||
|
||||
Application::~Application() {
|
||||
if (afe_detection_data_ != nullptr) {
|
||||
esp_afe_sr_v1.destroy(afe_detection_data_);
|
||||
}
|
||||
|
||||
if (afe_communication_data_ != nullptr) {
|
||||
esp_afe_vc_v1.destroy(afe_communication_data_);
|
||||
}
|
||||
|
||||
if (wake_word_encode_task_stack_ != nullptr) {
|
||||
free(wake_word_encode_task_stack_);
|
||||
}
|
||||
for (auto& pcm : wake_word_pcm_) {
|
||||
free(pcm.iov_base);
|
||||
}
|
||||
|
||||
if (opus_decoder_ != nullptr) {
|
||||
opus_decoder_destroy(opus_decoder_);
|
||||
}
|
||||
if (audio_encode_task_stack_ != nullptr) {
|
||||
free(audio_encode_task_stack_);
|
||||
}
|
||||
if (audio_decode_task_stack_ != nullptr) {
|
||||
free(audio_decode_task_stack_);
|
||||
}
|
||||
vQueueDelete(audio_decode_queue_);
|
||||
vQueueDelete(audio_encode_queue_);
|
||||
|
||||
vEventGroupDelete(event_group_);
|
||||
}
|
||||
@ -204,43 +175,139 @@ void Application::Start() {
|
||||
}
|
||||
#endif
|
||||
|
||||
audio_device_.OnInputData([this](const int16_t* data, int size) {
|
||||
#ifdef CONFIG_USE_AFE_SR
|
||||
if (audio_processor_.IsRunning()) {
|
||||
audio_processor_.Input(data, size);
|
||||
}
|
||||
if (wake_word_detect_.IsDetectionRunning()) {
|
||||
wake_word_detect_.Feed(data, size);
|
||||
}
|
||||
#else
|
||||
std::vector<int16_t> pcm(data, data + size);
|
||||
Schedule([this, pcm = std::move(pcm)]() {
|
||||
if (chat_state_ == kChatStateListening) {
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
audio_encode_queue_.emplace_back(std::move(pcm));
|
||||
cv_.notify_all();
|
||||
}
|
||||
});
|
||||
#endif
|
||||
});
|
||||
|
||||
// Initialize the audio device
|
||||
audio_device_.Start(CONFIG_AUDIO_INPUT_SAMPLE_RATE, CONFIG_AUDIO_OUTPUT_SAMPLE_RATE);
|
||||
audio_device_.OnStateChanged([this]() {
|
||||
if (audio_device_.playing()) {
|
||||
SetChatState(kChatStateSpeaking);
|
||||
} else {
|
||||
// Check if communication is still running
|
||||
if (xEventGroupGetBits(event_group_) & COMMUNICATION_RUNNING) {
|
||||
SetChatState(kChatStateListening);
|
||||
} else {
|
||||
SetChatState(kChatStateIdle);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// OPUS encoder / decoder use a lot of stack memory
|
||||
const size_t opus_stack_size = 4096 * 8;
|
||||
audio_encode_task_stack_ = (StackType_t*)malloc(opus_stack_size);
|
||||
xTaskCreateStatic([](void* arg) {
|
||||
audio_encode_task_ = xTaskCreateStatic([](void* arg) {
|
||||
Application* app = (Application*)arg;
|
||||
app->AudioEncodeTask();
|
||||
vTaskDelete(NULL);
|
||||
}, "opus_encode", opus_stack_size, this, 1, audio_encode_task_stack_, &audio_encode_task_buffer_);
|
||||
audio_decode_task_stack_ = (StackType_t*)malloc(opus_stack_size);
|
||||
xTaskCreateStatic([](void* arg) {
|
||||
Application* app = (Application*)arg;
|
||||
app->AudioDecodeTask();
|
||||
vTaskDelete(NULL);
|
||||
}, "opus_decode", opus_stack_size, this, 1, audio_decode_task_stack_, &audio_decode_task_buffer_);
|
||||
|
||||
StartCommunication();
|
||||
StartDetection();
|
||||
xTaskCreate([](void* arg) {
|
||||
Application* app = (Application*)arg;
|
||||
app->AudioPlayTask();
|
||||
vTaskDelete(NULL);
|
||||
}, "play_audio", 4096 * 2, this, 5, NULL);
|
||||
|
||||
#ifdef CONFIG_USE_AFE_SR
|
||||
wake_word_detect_.OnVadStateChange([this](bool speaking) {
|
||||
Schedule([this, speaking]() {
|
||||
auto& builtin_led = BuiltinLed::GetInstance();
|
||||
if (chat_state_ == kChatStateListening) {
|
||||
if (speaking) {
|
||||
builtin_led.SetRed(32);
|
||||
} else {
|
||||
builtin_led.SetRed(8);
|
||||
}
|
||||
builtin_led.TurnOn();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
wake_word_detect_.OnWakeWordDetected([this]() {
|
||||
Schedule([this]() {
|
||||
if (chat_state_ == kChatStateIdle) {
|
||||
// Encode the wake word data and start websocket client at the same time
|
||||
// They both consume a lot of time (700ms), so we can do them in parallel
|
||||
wake_word_detect_.EncodeWakeWordData();
|
||||
|
||||
SetChatState(kChatStateConnecting);
|
||||
if (ws_client_ == nullptr) {
|
||||
StartWebSocketClient();
|
||||
}
|
||||
if (ws_client_ && ws_client_->IsConnected()) {
|
||||
auto encoded = wake_word_detect_.GetWakeWordStream();
|
||||
// Send the wake word data to the server
|
||||
ws_client_->Send(encoded.data(), encoded.size(), true);
|
||||
opus_encoder_.ResetState();
|
||||
// Send a ready message to indicate the server that the wake word data is sent
|
||||
SetChatState(kChatStateWakeWordDetected);
|
||||
// If connected, the hello message is already sent, so we can start communication
|
||||
audio_processor_.Start();
|
||||
ESP_LOGI(TAG, "Audio processor started");
|
||||
} else {
|
||||
SetChatState(kChatStateIdle);
|
||||
}
|
||||
} else if (chat_state_ == kChatStateSpeaking) {
|
||||
break_speaking_ = true;
|
||||
}
|
||||
|
||||
// Resume detection
|
||||
wake_word_detect_.StartDetection();
|
||||
});
|
||||
});
|
||||
wake_word_detect_.StartDetection();
|
||||
|
||||
audio_processor_.OnOutput([this](std::vector<int16_t>&& data) {
|
||||
Schedule([this, data = std::move(data)]() {
|
||||
if (chat_state_ == kChatStateListening) {
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
audio_encode_queue_.emplace_back(std::move(data));
|
||||
cv_.notify_all();
|
||||
}
|
||||
});
|
||||
});
|
||||
#endif
|
||||
|
||||
// Blink the LED to indicate the device is running
|
||||
builtin_led.SetGreen();
|
||||
builtin_led.BlinkOnce();
|
||||
xEventGroupSetBits(event_group_, DETECTION_RUNNING);
|
||||
|
||||
button_.OnClick([this]() {
|
||||
Schedule([this]() {
|
||||
if (chat_state_ == kChatStateIdle) {
|
||||
SetChatState(kChatStateConnecting);
|
||||
StartWebSocketClient();
|
||||
|
||||
if (ws_client_ && ws_client_->IsConnected()) {
|
||||
opus_encoder_.ResetState();
|
||||
#ifdef CONFIG_USE_AFE_SR
|
||||
audio_processor_.Start();
|
||||
#endif
|
||||
SetChatState(kChatStateListening);
|
||||
ESP_LOGI(TAG, "Communication started");
|
||||
} else {
|
||||
SetChatState(kChatStateIdle);
|
||||
}
|
||||
} else if (chat_state_ == kChatStateSpeaking) {
|
||||
break_speaking_ = true;
|
||||
} else if (chat_state_ == kChatStateListening) {
|
||||
if (ws_client_ && ws_client_->IsConnected()) {
|
||||
ws_client_->Close();
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
xTaskCreate([](void* arg) {
|
||||
Application* app = (Application*)arg;
|
||||
app->MainLoop();
|
||||
vTaskDelete(NULL);
|
||||
}, "main_loop", 4096 * 2, this, 5, NULL);
|
||||
|
||||
// Launch a task to check for new firmware version
|
||||
xTaskCreate([](void* arg) {
|
||||
@ -259,6 +326,28 @@ void Application::Start() {
|
||||
#endif
|
||||
}
|
||||
|
||||
void Application::Schedule(std::function<void()> callback) {
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
main_tasks_.push_back(callback);
|
||||
cv_.notify_all();
|
||||
}
|
||||
|
||||
// The Main Loop controls the chat state and websocket connection
|
||||
// If other tasks need to access the websocket or chat state,
|
||||
// they should use Schedule to call this function
|
||||
void Application::MainLoop() {
|
||||
while (true) {
|
||||
std::unique_lock<std::mutex> lock(mutex_);
|
||||
cv_.wait(lock, [this]() {
|
||||
return !main_tasks_.empty();
|
||||
});
|
||||
auto task = std::move(main_tasks_.front());
|
||||
main_tasks_.pop_front();
|
||||
lock.unlock();
|
||||
task();
|
||||
}
|
||||
}
|
||||
|
||||
void Application::SetChatState(ChatState state) {
|
||||
const char* state_str[] = {
|
||||
"idle",
|
||||
@ -294,199 +383,27 @@ void Application::SetChatState(ChatState state) {
|
||||
builtin_led.SetBlue();
|
||||
builtin_led.TurnOn();
|
||||
break;
|
||||
case kChatStateTesting:
|
||||
builtin_led.SetRed();
|
||||
builtin_led.TurnOn();
|
||||
break;
|
||||
case kChatStateUpgrading:
|
||||
builtin_led.SetGreen();
|
||||
builtin_led.StartContinuousBlink(100);
|
||||
break;
|
||||
}
|
||||
|
||||
std::lock_guard<std::recursive_mutex> lock(mutex_);
|
||||
if (ws_client_ && ws_client_->IsConnected()) {
|
||||
cJSON* root = cJSON_CreateObject();
|
||||
cJSON_AddStringToObject(root, "type", "state");
|
||||
cJSON_AddStringToObject(root, "state", state_str[chat_state_]);
|
||||
char* json = cJSON_PrintUnformatted(root);
|
||||
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
ws_client_->Send(json);
|
||||
cJSON_Delete(root);
|
||||
free(json);
|
||||
}
|
||||
}
|
||||
|
||||
void Application::StartCommunication() {
|
||||
afe_config_t afe_config = {
|
||||
.aec_init = false,
|
||||
.se_init = true,
|
||||
.vad_init = true,
|
||||
.wakenet_init = false,
|
||||
.voice_communication_init = true,
|
||||
.voice_communication_agc_init = true,
|
||||
.voice_communication_agc_gain = 10,
|
||||
.vad_mode = VAD_MODE_3,
|
||||
.wakenet_model_name = NULL,
|
||||
.wakenet_model_name_2 = NULL,
|
||||
.wakenet_mode = DET_MODE_90,
|
||||
.afe_mode = SR_MODE_HIGH_PERF,
|
||||
.afe_perferred_core = 0,
|
||||
.afe_perferred_priority = 5,
|
||||
.afe_ringbuf_size = 50,
|
||||
.memory_alloc_mode = AFE_MEMORY_ALLOC_MORE_PSRAM,
|
||||
.afe_linear_gain = 1.0,
|
||||
.agc_mode = AFE_MN_PEAK_AGC_MODE_2,
|
||||
.pcm_config = {
|
||||
.total_ch_num = 1,
|
||||
.mic_num = 1,
|
||||
.ref_num = 0,
|
||||
.sample_rate = CONFIG_AUDIO_INPUT_SAMPLE_RATE,
|
||||
},
|
||||
.debug_init = false,
|
||||
.debug_hook = {{ AFE_DEBUG_HOOK_MASE_TASK_IN, NULL }, { AFE_DEBUG_HOOK_FETCH_TASK_IN, NULL }},
|
||||
.afe_ns_mode = NS_MODE_SSP,
|
||||
.afe_ns_model_name = NULL,
|
||||
.fixed_first_channel = true,
|
||||
};
|
||||
|
||||
afe_communication_data_ = esp_afe_vc_v1.create_from_config(&afe_config);
|
||||
|
||||
xTaskCreate([](void* arg) {
|
||||
Application* app = (Application*)arg;
|
||||
app->AudioCommunicationTask();
|
||||
vTaskDelete(NULL);
|
||||
}, "audio_communication", 4096 * 2, this, 5, NULL);
|
||||
}
|
||||
|
||||
void Application::StartDetection() {
|
||||
afe_config_t afe_config = {
|
||||
.aec_init = false,
|
||||
.se_init = true,
|
||||
.vad_init = true,
|
||||
.wakenet_init = true,
|
||||
.voice_communication_init = false,
|
||||
.voice_communication_agc_init = false,
|
||||
.voice_communication_agc_gain = 10,
|
||||
.vad_mode = VAD_MODE_3,
|
||||
.wakenet_model_name = wakenet_model_,
|
||||
.wakenet_model_name_2 = NULL,
|
||||
.wakenet_mode = DET_MODE_90,
|
||||
.afe_mode = SR_MODE_HIGH_PERF,
|
||||
.afe_perferred_core = 0,
|
||||
.afe_perferred_priority = 5,
|
||||
.afe_ringbuf_size = 50,
|
||||
.memory_alloc_mode = AFE_MEMORY_ALLOC_MORE_PSRAM,
|
||||
.afe_linear_gain = 1.0,
|
||||
.agc_mode = AFE_MN_PEAK_AGC_MODE_2,
|
||||
.pcm_config = {
|
||||
.total_ch_num = 1,
|
||||
.mic_num = 1,
|
||||
.ref_num = 0,
|
||||
.sample_rate = CONFIG_AUDIO_INPUT_SAMPLE_RATE
|
||||
},
|
||||
.debug_init = false,
|
||||
.debug_hook = {{ AFE_DEBUG_HOOK_MASE_TASK_IN, NULL }, { AFE_DEBUG_HOOK_FETCH_TASK_IN, NULL }},
|
||||
.afe_ns_mode = NS_MODE_SSP,
|
||||
.afe_ns_model_name = NULL,
|
||||
.fixed_first_channel = true,
|
||||
};
|
||||
|
||||
afe_detection_data_ = esp_afe_sr_v1.create_from_config(&afe_config);
|
||||
xTaskCreate([](void* arg) {
|
||||
Application* app = (Application*)arg;
|
||||
app->AudioFeedTask();
|
||||
vTaskDelete(NULL);
|
||||
}, "audio_feed", 4096 * 2, this, 5, NULL);
|
||||
|
||||
xTaskCreate([](void* arg) {
|
||||
Application* app = (Application*)arg;
|
||||
app->AudioDetectionTask();
|
||||
vTaskDelete(NULL);
|
||||
}, "audio_detection", 4096 * 2, this, 5, NULL);
|
||||
}
|
||||
|
||||
void Application::AudioFeedTask() {
|
||||
int chunk_size = esp_afe_vc_v1.get_feed_chunksize(afe_detection_data_);
|
||||
int16_t buffer[chunk_size];
|
||||
ESP_LOGI(TAG, "Audio feed task started, chunk size: %d", chunk_size);
|
||||
|
||||
while (true) {
|
||||
audio_device_.Read(buffer, chunk_size);
|
||||
|
||||
auto event_bits = xEventGroupGetBits(event_group_);
|
||||
if (event_bits & DETECTION_RUNNING) {
|
||||
esp_afe_sr_v1.feed(afe_detection_data_, buffer);
|
||||
} else if (event_bits & COMMUNICATION_RUNNING) {
|
||||
esp_afe_vc_v1.feed(afe_communication_data_, buffer);
|
||||
}
|
||||
}
|
||||
|
||||
vTaskDelete(NULL);
|
||||
}
|
||||
|
||||
void Application::StoreWakeWordData(uint8_t* data, size_t size) {
|
||||
// store audio data to wake_word_pcm_
|
||||
auto iov = (iovec){
|
||||
.iov_base = heap_caps_malloc(size, MALLOC_CAP_SPIRAM),
|
||||
.iov_len = size
|
||||
};
|
||||
memcpy(iov.iov_base, data, size);
|
||||
wake_word_pcm_.push_back(iov);
|
||||
// keep about 2 seconds of data, detect duration is 32ms (sample_rate == 16000, chunksize == 512)
|
||||
while (wake_word_pcm_.size() > 2000 / 32) {
|
||||
heap_caps_free(wake_word_pcm_.front().iov_base);
|
||||
wake_word_pcm_.pop_front();
|
||||
}
|
||||
}
|
||||
|
||||
void Application::EncodeWakeWordData() {
|
||||
if (wake_word_encode_task_stack_ == nullptr) {
|
||||
wake_word_encode_task_stack_ = (StackType_t*)malloc(4096 * 8);
|
||||
}
|
||||
wake_word_encode_task_ = xTaskCreateStatic([](void* arg) {
|
||||
Application* app = (Application*)arg;
|
||||
auto start_time = esp_timer_get_time();
|
||||
// encode detect packets
|
||||
OpusEncoder* encoder = new OpusEncoder();
|
||||
encoder->Configure(CONFIG_AUDIO_INPUT_SAMPLE_RATE, 1, 60);
|
||||
encoder->SetComplexity(0);
|
||||
app->wake_word_opus_.resize(4096 * 4);
|
||||
size_t offset = 0;
|
||||
|
||||
for (auto& pcm: app->wake_word_pcm_) {
|
||||
encoder->Encode(pcm, [app, &offset](const iovec opus) {
|
||||
size_t protocol_size = sizeof(BinaryProtocol) + opus.iov_len;
|
||||
if (offset + protocol_size < app->wake_word_opus_.size()) {
|
||||
auto protocol = (BinaryProtocol*)(&app->wake_word_opus_[offset]);
|
||||
protocol->version = htons(PROTOCOL_VERSION);
|
||||
protocol->type = htons(0);
|
||||
protocol->reserved = 0;
|
||||
protocol->timestamp = htonl(app->audio_device_.playing() ? app->audio_device_.last_timestamp() : 0);
|
||||
protocol->payload_size = htonl(opus.iov_len);
|
||||
memcpy(protocol->payload, opus.iov_base, opus.iov_len);
|
||||
offset += protocol_size;
|
||||
}
|
||||
});
|
||||
heap_caps_free(pcm.iov_base);
|
||||
}
|
||||
app->wake_word_pcm_.clear();
|
||||
app->wake_word_opus_.resize(offset);
|
||||
|
||||
auto end_time = esp_timer_get_time();
|
||||
ESP_LOGI(TAG, "Encode wake word opus: %zu bytes in %lld ms", app->wake_word_opus_.size(), (end_time - start_time) / 1000);
|
||||
xEventGroupSetBits(app->event_group_, WAKE_WORD_ENCODED);
|
||||
delete encoder;
|
||||
vTaskDelete(NULL);
|
||||
}, "encode_detect_packets", 4096 * 8, this, 1, wake_word_encode_task_stack_, &wake_word_encode_task_buffer_);
|
||||
}
|
||||
|
||||
void Application::SendWakeWordData() {
|
||||
ws_client_->Send(wake_word_opus_.data(), wake_word_opus_.size(), true);
|
||||
wake_word_opus_.clear();
|
||||
}
|
||||
|
||||
BinaryProtocol* Application::AllocateBinaryProtocol(void* payload, size_t payload_size) {
|
||||
auto last_timestamp = audio_device_.playing() ? audio_device_.last_timestamp() : 0;
|
||||
BinaryProtocol* Application::AllocateBinaryProtocol(const uint8_t* payload, size_t payload_size) {
|
||||
auto last_timestamp = 0;
|
||||
auto protocol = (BinaryProtocol*)heap_caps_malloc(sizeof(BinaryProtocol) + payload_size, MALLOC_CAP_SPIRAM);
|
||||
protocol->version = htons(PROTOCOL_VERSION);
|
||||
protocol->type = htons(0);
|
||||
@ -498,197 +415,34 @@ BinaryProtocol* Application::AllocateBinaryProtocol(void* payload, size_t payloa
|
||||
return protocol;
|
||||
}
|
||||
|
||||
void Application::CheckTestButton() {
|
||||
if (gpio_get_level(GPIO_NUM_1) == 0) {
|
||||
if (chat_state_ == kChatStateIdle) {
|
||||
SetChatState(kChatStateTesting);
|
||||
test_resampler_.Configure(CONFIG_AUDIO_INPUT_SAMPLE_RATE, CONFIG_AUDIO_OUTPUT_SAMPLE_RATE);
|
||||
}
|
||||
} else {
|
||||
if (chat_state_ == kChatStateTesting) {
|
||||
SetChatState(kChatStateIdle);
|
||||
|
||||
// 创建新线程来处理音频播放
|
||||
xTaskCreate([](void* arg) {
|
||||
Application* app = static_cast<Application*>(arg);
|
||||
app->PlayTestAudio();
|
||||
vTaskDelete(NULL);
|
||||
}, "play_test_audio", 4096, this, 1, NULL);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Application::PlayTestAudio() {
|
||||
// 写入音频数据到扬声器
|
||||
auto packet = new AudioPacket();
|
||||
packet->type = kAudioPacketTypeStart;
|
||||
audio_device_.QueueAudioPacket(packet);
|
||||
|
||||
for (auto& pcm : test_pcm_) {
|
||||
packet = new AudioPacket();
|
||||
packet->type = kAudioPacketTypeData;
|
||||
packet->pcm.resize(test_resampler_.GetOutputSamples(pcm.iov_len / 2));
|
||||
test_resampler_.Process((int16_t*)pcm.iov_base, pcm.iov_len / 2, packet->pcm.data());
|
||||
audio_device_.QueueAudioPacket(packet);
|
||||
heap_caps_free(pcm.iov_base);
|
||||
}
|
||||
// 清除测试PCM数据
|
||||
test_pcm_.clear();
|
||||
|
||||
// 停止音频设备
|
||||
packet = new AudioPacket();
|
||||
packet->type = kAudioPacketTypeStop;
|
||||
audio_device_.QueueAudioPacket(packet);
|
||||
}
|
||||
|
||||
void Application::AudioDetectionTask() {
|
||||
auto chunk_size = esp_afe_sr_v1.get_fetch_chunksize(afe_detection_data_);
|
||||
ESP_LOGI(TAG, "Audio detection task started, chunk size: %d", chunk_size);
|
||||
|
||||
while (true) {
|
||||
xEventGroupWaitBits(event_group_, DETECTION_RUNNING, pdFALSE, pdTRUE, portMAX_DELAY);
|
||||
|
||||
auto res = esp_afe_sr_v1.fetch(afe_detection_data_);
|
||||
if (res == nullptr || res->ret_value == ESP_FAIL) {
|
||||
ESP_LOGE(TAG, "Error in AudioDetectionTask");
|
||||
if (res != nullptr) {
|
||||
ESP_LOGI(TAG, "Error code: %d", res->ret_value);
|
||||
}
|
||||
continue;;
|
||||
}
|
||||
|
||||
// Store the wake word data for voice recognition, like who is speaking
|
||||
StoreWakeWordData((uint8_t*)res->data, res->data_size);
|
||||
|
||||
CheckTestButton();
|
||||
if (chat_state_ == kChatStateTesting) {
|
||||
auto& builtin_led = BuiltinLed::GetInstance();
|
||||
if (res->vad_state == AFE_VAD_SPEECH) {
|
||||
iovec iov = {
|
||||
.iov_base = heap_caps_malloc(res->data_size, MALLOC_CAP_SPIRAM),
|
||||
.iov_len = (size_t)res->data_size
|
||||
};
|
||||
memcpy(iov.iov_base, res->data, res->data_size);
|
||||
test_pcm_.push_back(iov);
|
||||
builtin_led.SetRed(128);
|
||||
} else {
|
||||
builtin_led.SetRed(32);
|
||||
}
|
||||
builtin_led.TurnOn();
|
||||
continue;
|
||||
}
|
||||
|
||||
if (chat_state_ == kChatStateIdle && res->wakeup_state == WAKENET_DETECTED) {
|
||||
xEventGroupClearBits(event_group_, DETECTION_RUNNING);
|
||||
SetChatState(kChatStateConnecting);
|
||||
|
||||
// Encode the wake word data and start websocket client at the same time
|
||||
// They both consume a lot of time (700ms), so we can do them in parallel
|
||||
EncodeWakeWordData();
|
||||
StartWebSocketClient();
|
||||
|
||||
// Here the websocket is done, and we also wait for the wake word data to be encoded
|
||||
xEventGroupWaitBits(event_group_, WAKE_WORD_ENCODED, pdTRUE, pdTRUE, portMAX_DELAY);
|
||||
|
||||
std::lock_guard<std::recursive_mutex> lock(mutex_);
|
||||
if (ws_client_ && ws_client_->IsConnected()) {
|
||||
// Send the wake word data to the server
|
||||
SendWakeWordData();
|
||||
// Send a ready message to indicate the server that the wake word data is sent
|
||||
SetChatState(kChatStateWakeWordDetected);
|
||||
opus_encoder_.ResetState();
|
||||
// If connected, the hello message is already sent, so we can start communication
|
||||
xEventGroupSetBits(event_group_, COMMUNICATION_RUNNING);
|
||||
ESP_LOGI(TAG, "Communication running");
|
||||
} else {
|
||||
SetChatState(kChatStateIdle);
|
||||
xEventGroupSetBits(event_group_, DETECTION_RUNNING);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Application::AudioCommunicationTask() {
|
||||
int chunk_size = esp_afe_vc_v1.get_fetch_chunksize(afe_communication_data_);
|
||||
ESP_LOGI(TAG, "Audio communication task started, chunk size: %d", chunk_size);
|
||||
|
||||
while (true) {
|
||||
xEventGroupWaitBits(event_group_, COMMUNICATION_RUNNING, pdFALSE, pdTRUE, portMAX_DELAY);
|
||||
|
||||
auto res = esp_afe_vc_v1.fetch(afe_communication_data_);
|
||||
if (res == nullptr || res->ret_value == ESP_FAIL) {
|
||||
ESP_LOGE(TAG, "Error in AudioCommunicationTask");
|
||||
if (res != nullptr) {
|
||||
ESP_LOGI(TAG, "Error code: %d", res->ret_value);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if the websocket client is disconnected by the server
|
||||
{
|
||||
std::lock_guard<std::recursive_mutex> lock(mutex_);
|
||||
if (ws_client_ == nullptr || !ws_client_->IsConnected()) {
|
||||
xEventGroupClearBits(event_group_, COMMUNICATION_RUNNING);
|
||||
if (audio_device_.playing()) {
|
||||
audio_device_.Break();
|
||||
}
|
||||
SetChatState(kChatStateIdle);
|
||||
if (ws_client_ != nullptr) {
|
||||
delete ws_client_;
|
||||
ws_client_ = nullptr;
|
||||
}
|
||||
xEventGroupSetBits(event_group_, DETECTION_RUNNING);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (chat_state_ == kChatStateListening) {
|
||||
// Update the LED state based on the VAD state
|
||||
auto& builtin_led = BuiltinLed::GetInstance();
|
||||
if (res->vad_state == AFE_VAD_SPEECH) {
|
||||
builtin_led.SetRed(128);
|
||||
} else {
|
||||
builtin_led.SetRed(32);
|
||||
}
|
||||
builtin_led.TurnOn();
|
||||
|
||||
// Send audio data to server
|
||||
iovec data = {
|
||||
.iov_base = malloc(res->data_size),
|
||||
.iov_len = (size_t)res->data_size
|
||||
};
|
||||
memcpy(data.iov_base, res->data, res->data_size);
|
||||
xQueueSend(audio_encode_queue_, &data, portMAX_DELAY);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Application::AudioEncodeTask() {
|
||||
ESP_LOGI(TAG, "Audio encode task started");
|
||||
while (true) {
|
||||
iovec pcm;
|
||||
xQueueReceive(audio_encode_queue_, &pcm, portMAX_DELAY);
|
||||
|
||||
// Encode audio data
|
||||
opus_encoder_.Encode(pcm, [this](const iovec opus) {
|
||||
auto protocol = AllocateBinaryProtocol(opus.iov_base, opus.iov_len);
|
||||
std::lock_guard<std::recursive_mutex> lock(mutex_);
|
||||
if (ws_client_ && ws_client_->IsConnected()) {
|
||||
ws_client_->Send(protocol, sizeof(BinaryProtocol) + opus.iov_len, true);
|
||||
}
|
||||
heap_caps_free(protocol);
|
||||
std::unique_lock<std::mutex> lock(mutex_);
|
||||
cv_.wait(lock, [this]() {
|
||||
return !audio_encode_queue_.empty() || !audio_decode_queue_.empty();
|
||||
});
|
||||
|
||||
free(pcm.iov_base);
|
||||
}
|
||||
}
|
||||
if (!audio_encode_queue_.empty()) {
|
||||
auto pcm = std::move(audio_encode_queue_.front());
|
||||
audio_encode_queue_.pop_front();
|
||||
lock.unlock();
|
||||
|
||||
void Application::AudioDecodeTask() {
|
||||
while (true) {
|
||||
AudioPacket* packet;
|
||||
xQueueReceive(audio_decode_queue_, &packet, portMAX_DELAY);
|
||||
// Encode audio data
|
||||
opus_encoder_.Encode(pcm, [this](const uint8_t* opus, size_t opus_size) {
|
||||
auto protocol = AllocateBinaryProtocol(opus, opus_size);
|
||||
Schedule([this, protocol, opus_size]() {
|
||||
if (ws_client_ && ws_client_->IsConnected()) {
|
||||
ws_client_->Send(protocol, sizeof(BinaryProtocol) + opus_size, true);
|
||||
}
|
||||
heap_caps_free(protocol);
|
||||
});
|
||||
});
|
||||
} else if (!audio_decode_queue_.empty()) {
|
||||
auto packet = std::move(audio_decode_queue_.front());
|
||||
audio_decode_queue_.pop_front();
|
||||
lock.unlock();
|
||||
|
||||
if (packet->type == kAudioPacketTypeData) {
|
||||
int frame_size = opus_decode_sample_rate_ / 1000 * opus_duration_ms_;
|
||||
packet->pcm.resize(frame_size);
|
||||
|
||||
@ -705,9 +459,74 @@ void Application::AudioDecodeTask() {
|
||||
opus_resampler_.Process(packet->pcm.data(), frame_size, resampled.data());
|
||||
packet->pcm = std::move(resampled);
|
||||
}
|
||||
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
audio_play_queue_.push_back(packet);
|
||||
cv_.notify_all();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Application::HandleAudioPacket(AudioPacket* packet) {
|
||||
switch (packet->type)
|
||||
{
|
||||
case kAudioPacketTypeData: {
|
||||
if (skip_to_end_) {
|
||||
break;
|
||||
}
|
||||
|
||||
audio_device_.QueueAudioPacket(packet);
|
||||
// This will block until the audio device has finished playing the audio
|
||||
audio_device_.OutputData(packet->pcm);
|
||||
|
||||
if (break_speaking_) {
|
||||
break_speaking_ = false;
|
||||
skip_to_end_ = true;
|
||||
|
||||
// Play a silence and skip to the end
|
||||
int frame_size = opus_decode_sample_rate_ / 1000 * opus_duration_ms_;
|
||||
std::vector<int16_t> silence(frame_size);
|
||||
bzero(silence.data(), silence.size() * sizeof(int16_t));
|
||||
audio_device_.OutputData(silence);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kAudioPacketTypeStart:
|
||||
Schedule([this]() {
|
||||
SetChatState(kChatStateSpeaking);
|
||||
});
|
||||
break;
|
||||
case kAudioPacketTypeStop:
|
||||
skip_to_end_ = false;
|
||||
Schedule([this]() {
|
||||
SetChatState(kChatStateListening);
|
||||
});
|
||||
break;
|
||||
case kAudioPacketTypeSentenceStart:
|
||||
ESP_LOGI(TAG, "<< %s", packet->text.c_str());
|
||||
break;
|
||||
case kAudioPacketTypeSentenceEnd:
|
||||
break;
|
||||
default:
|
||||
ESP_LOGI(TAG, "Unknown packet type: %d", packet->type);
|
||||
break;
|
||||
}
|
||||
|
||||
delete packet;
|
||||
}
|
||||
|
||||
void Application::AudioPlayTask() {
|
||||
ESP_LOGI(TAG, "Audio play task started");
|
||||
|
||||
while (true) {
|
||||
std::unique_lock<std::mutex> lock(mutex_);
|
||||
cv_.wait(lock, [this]() {
|
||||
return !audio_play_queue_.empty();
|
||||
});
|
||||
auto packet = std::move(audio_play_queue_.front());
|
||||
audio_play_queue_.pop_front();
|
||||
lock.unlock();
|
||||
|
||||
HandleAudioPacket(packet);
|
||||
}
|
||||
}
|
||||
|
||||
@ -726,6 +545,7 @@ void Application::SetDecodeSampleRate(int sample_rate) {
|
||||
|
||||
void Application::StartWebSocketClient() {
|
||||
if (ws_client_ != nullptr) {
|
||||
ESP_LOGW(TAG, "WebSocket client already exists");
|
||||
delete ws_client_;
|
||||
}
|
||||
|
||||
@ -746,7 +566,6 @@ void Application::StartWebSocketClient() {
|
||||
// keys: message type, version, wakeup_model, audio_params (format, sample_rate, channels)
|
||||
std::string message = "{";
|
||||
message += "\"type\":\"hello\",";
|
||||
message += "\"wakeup_model\":\"" + std::string(wakenet_model_) + "\",";
|
||||
message += "\"audio_params\":{";
|
||||
message += "\"format\":\"opus\", \"sample_rate\":" + std::to_string(CONFIG_AUDIO_INPUT_SAMPLE_RATE) + ", \"channels\":1";
|
||||
message += "}}";
|
||||
@ -763,7 +582,10 @@ void Application::StartWebSocketClient() {
|
||||
auto payload_size = ntohl(protocol->payload_size);
|
||||
packet->opus.resize(payload_size);
|
||||
memcpy(packet->opus.data(), protocol->payload, payload_size);
|
||||
xQueueSend(audio_decode_queue_, &packet, portMAX_DELAY);
|
||||
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
audio_decode_queue_.push_back(packet);
|
||||
cv_.notify_all();
|
||||
} else {
|
||||
// Parse JSON data
|
||||
auto root = cJSON_Parse(data);
|
||||
@ -786,7 +608,10 @@ void Application::StartWebSocketClient() {
|
||||
packet->type = kAudioPacketTypeSentenceStart;
|
||||
packet->text = cJSON_GetObjectItem(root, "text")->valuestring;
|
||||
}
|
||||
xQueueSend(audio_decode_queue_, &packet, portMAX_DELAY);
|
||||
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
audio_decode_queue_.push_back(packet);
|
||||
cv_.notify_all();
|
||||
} else if (strcmp(type->valuestring, "stt") == 0) {
|
||||
auto text = cJSON_GetObjectItem(root, "text");
|
||||
if (text != NULL) {
|
||||
@ -804,6 +629,14 @@ void Application::StartWebSocketClient() {
|
||||
|
||||
ws_client_->OnDisconnected([this]() {
|
||||
ESP_LOGI(TAG, "Websocket disconnected");
|
||||
Schedule([this]() {
|
||||
#ifdef CONFIG_USE_AFE_SR
|
||||
audio_processor_.Stop();
|
||||
#endif
|
||||
delete ws_client_;
|
||||
ws_client_ = nullptr;
|
||||
SetChatState(kChatStateIdle);
|
||||
});
|
||||
});
|
||||
|
||||
if (!ws_client_->Connect(CONFIG_WEBSOCKET_URL)) {
|
||||
|
@ -2,28 +2,33 @@
|
||||
#define _APPLICATION_H_
|
||||
|
||||
#include "AudioDevice.h"
|
||||
#include "OpusEncoder.h"
|
||||
#include "OpusResampler.h"
|
||||
#include "WebSocket.h"
|
||||
#include "Display.h"
|
||||
#include "Ml307AtModem.h"
|
||||
#include "FirmwareUpgrade.h"
|
||||
#include "Ml307Http.h"
|
||||
#include "EspHttp.h"
|
||||
#include <OpusEncoder.h>
|
||||
#include <OpusResampler.h>
|
||||
#include <WebSocket.h>
|
||||
#include <Ml307AtModem.h>
|
||||
#include <Ml307Http.h>
|
||||
#include <EspHttp.h>
|
||||
|
||||
#include "opus.h"
|
||||
#include "resampler_structs.h"
|
||||
#include "freertos/event_groups.h"
|
||||
#include "freertos/queue.h"
|
||||
#include "freertos/task.h"
|
||||
#include "esp_afe_sr_models.h"
|
||||
#include "esp_nsn_models.h"
|
||||
#include <opus.h>
|
||||
#include <resampler_structs.h>
|
||||
#include <freertos/event_groups.h>
|
||||
#include <freertos/task.h>
|
||||
#include <mutex>
|
||||
#include <list>
|
||||
#include <condition_variable>
|
||||
|
||||
#include "Display.h"
|
||||
#include "FirmwareUpgrade.h"
|
||||
|
||||
#ifdef CONFIG_USE_AFE_SR
|
||||
#include "WakeWordDetect.h"
|
||||
#include "AudioProcessor.h"
|
||||
#endif
|
||||
|
||||
#include "Button.h"
|
||||
|
||||
#define DETECTION_RUNNING 1
|
||||
#define COMMUNICATION_RUNNING 2
|
||||
#define WAKE_WORD_ENCODED 4
|
||||
|
||||
#define PROTOCOL_VERSION 2
|
||||
struct BinaryProtocol {
|
||||
@ -35,6 +40,23 @@ struct BinaryProtocol {
|
||||
uint8_t payload[];
|
||||
} __attribute__((packed));
|
||||
|
||||
enum AudioPacketType {
|
||||
kAudioPacketTypeUnkonwn = 0,
|
||||
kAudioPacketTypeStart,
|
||||
kAudioPacketTypeStop,
|
||||
kAudioPacketTypeData,
|
||||
kAudioPacketTypeSentenceStart,
|
||||
kAudioPacketTypeSentenceEnd
|
||||
};
|
||||
|
||||
struct AudioPacket {
|
||||
AudioPacketType type = kAudioPacketTypeUnkonwn;
|
||||
std::string text;
|
||||
std::vector<uint8_t> opus;
|
||||
std::vector<int16_t> pcm;
|
||||
uint32_t timestamp;
|
||||
};
|
||||
|
||||
|
||||
enum ChatState {
|
||||
kChatStateIdle,
|
||||
@ -42,7 +64,6 @@ enum ChatState {
|
||||
kChatStateListening,
|
||||
kChatStateSpeaking,
|
||||
kChatStateWakeWordDetected,
|
||||
kChatStateTesting,
|
||||
kChatStateUpgrading
|
||||
};
|
||||
|
||||
@ -63,7 +84,12 @@ private:
|
||||
Application();
|
||||
~Application();
|
||||
|
||||
Button button_;
|
||||
AudioDevice audio_device_;
|
||||
#ifdef CONFIG_USE_AFE_SR
|
||||
WakeWordDetect wake_word_detect_;
|
||||
AudioProcessor audio_processor_;
|
||||
#endif
|
||||
#ifdef CONFIG_USE_ML307
|
||||
Ml307AtModem ml307_at_modem_;
|
||||
Ml307Http http_;
|
||||
@ -74,25 +100,22 @@ private:
|
||||
#ifdef CONFIG_USE_DISPLAY
|
||||
Display display_;
|
||||
#endif
|
||||
|
||||
std::recursive_mutex mutex_;
|
||||
std::mutex mutex_;
|
||||
std::condition_variable_any cv_;
|
||||
std::list<std::function<void()>> main_tasks_;
|
||||
WebSocket* ws_client_ = nullptr;
|
||||
esp_afe_sr_data_t* afe_detection_data_ = nullptr;
|
||||
esp_afe_sr_data_t* afe_communication_data_ = nullptr;
|
||||
EventGroupHandle_t event_group_;
|
||||
char* wakenet_model_ = NULL;
|
||||
volatile ChatState chat_state_ = kChatStateIdle;
|
||||
volatile bool break_speaking_ = false;
|
||||
bool skip_to_end_ = false;
|
||||
|
||||
// Audio encode / decode
|
||||
TaskHandle_t audio_feed_task_ = nullptr;
|
||||
TaskHandle_t audio_encode_task_ = nullptr;
|
||||
StaticTask_t audio_encode_task_buffer_;
|
||||
StackType_t* audio_encode_task_stack_ = nullptr;
|
||||
QueueHandle_t audio_encode_queue_ = nullptr;
|
||||
|
||||
TaskHandle_t audio_decode_task_ = nullptr;
|
||||
StaticTask_t audio_decode_task_buffer_;
|
||||
StackType_t* audio_decode_task_stack_ = nullptr;
|
||||
QueueHandle_t audio_decode_queue_ = nullptr;
|
||||
std::list<std::vector<int16_t>> audio_encode_queue_;
|
||||
std::list<AudioPacket*> audio_decode_queue_;
|
||||
std::list<AudioPacket*> audio_play_queue_;
|
||||
|
||||
OpusEncoder opus_encoder_;
|
||||
OpusDecoder* opus_decoder_ = nullptr;
|
||||
@ -100,38 +123,23 @@ private:
|
||||
int opus_duration_ms_ = 60;
|
||||
int opus_decode_sample_rate_ = CONFIG_AUDIO_OUTPUT_SAMPLE_RATE;
|
||||
OpusResampler opus_resampler_;
|
||||
OpusResampler test_resampler_;
|
||||
std::vector<iovec> test_pcm_;
|
||||
|
||||
TaskHandle_t wake_word_encode_task_ = nullptr;
|
||||
StaticTask_t wake_word_encode_task_buffer_;
|
||||
StackType_t* wake_word_encode_task_stack_ = nullptr;
|
||||
std::list<iovec> wake_word_pcm_;
|
||||
std::string wake_word_opus_;
|
||||
|
||||
TaskHandle_t check_new_version_task_ = nullptr;
|
||||
StaticTask_t check_new_version_task_buffer_;
|
||||
StackType_t* check_new_version_task_stack_ = nullptr;
|
||||
|
||||
BinaryProtocol* AllocateBinaryProtocol(void* payload, size_t payload_size);
|
||||
void MainLoop();
|
||||
void Schedule(std::function<void()> callback);
|
||||
BinaryProtocol* AllocateBinaryProtocol(const uint8_t* payload, size_t payload_size);
|
||||
void SetDecodeSampleRate(int sample_rate);
|
||||
void SetChatState(ChatState state);
|
||||
void StartDetection();
|
||||
void StartCommunication();
|
||||
void StartWebSocketClient();
|
||||
void StoreWakeWordData(uint8_t* data, size_t size);
|
||||
void EncodeWakeWordData();
|
||||
void SendWakeWordData();
|
||||
void CheckTestButton();
|
||||
void PlayTestAudio();
|
||||
void CheckNewVersion();
|
||||
void UpdateDisplay();
|
||||
|
||||
void AudioFeedTask();
|
||||
void AudioDetectionTask();
|
||||
void AudioCommunicationTask();
|
||||
|
||||
void AudioEncodeTask();
|
||||
void AudioDecodeTask();
|
||||
void AudioPlayTask();
|
||||
void HandleAudioPacket(AudioPacket* packet);
|
||||
};
|
||||
|
||||
#endif // _APPLICATION_H_
|
||||
|
@ -1,18 +1,15 @@
|
||||
#include "AudioDevice.h"
|
||||
#include "esp_log.h"
|
||||
#include <esp_log.h>
|
||||
#include <cstring>
|
||||
|
||||
#define TAG "AudioDevice"
|
||||
|
||||
AudioDevice::AudioDevice() {
|
||||
audio_play_queue_ = xQueueCreate(100, sizeof(AudioPacket*));
|
||||
}
|
||||
|
||||
AudioDevice::~AudioDevice() {
|
||||
vQueueDelete(audio_play_queue_);
|
||||
|
||||
if (audio_play_task_ != nullptr) {
|
||||
vTaskDelete(audio_play_task_);
|
||||
if (audio_input_task_ != nullptr) {
|
||||
vTaskDelete(audio_input_task_);
|
||||
}
|
||||
if (rx_handle_ != nullptr) {
|
||||
ESP_ERROR_CHECK(i2s_channel_disable(rx_handle_));
|
||||
@ -37,8 +34,8 @@ void AudioDevice::Start(int input_sample_rate, int output_sample_rate) {
|
||||
|
||||
xTaskCreate([](void* arg) {
|
||||
auto audio_device = (AudioDevice*)arg;
|
||||
audio_device->AudioPlayTask();
|
||||
}, "audio_play", 4096 * 4, this, 5, &audio_play_task_);
|
||||
audio_device->InputTask();
|
||||
}, "audio_input", 4096 * 2, this, 5, &audio_input_task_);
|
||||
}
|
||||
|
||||
void AudioDevice::CreateDuplexChannels() {
|
||||
@ -180,58 +177,22 @@ int AudioDevice::Read(int16_t* dest, int samples) {
|
||||
return samples;
|
||||
}
|
||||
|
||||
void AudioDevice::QueueAudioPacket(AudioPacket* packet) {
|
||||
xQueueSend(audio_play_queue_, &packet, portMAX_DELAY);
|
||||
void AudioDevice::OnInputData(std::function<void(const int16_t*, int)> callback) {
|
||||
on_input_data_ = callback;
|
||||
}
|
||||
|
||||
void AudioDevice::AudioPlayTask() {
|
||||
while (true) {
|
||||
AudioPacket* packet;
|
||||
xQueueReceive(audio_play_queue_, &packet, portMAX_DELAY);
|
||||
void AudioDevice::OutputData(std::vector<int16_t>& data) {
|
||||
Write(data.data(), data.size());
|
||||
}
|
||||
|
||||
switch (packet->type)
|
||||
{
|
||||
case kAudioPacketTypeStart:
|
||||
playing_ = true;
|
||||
breaked_ = false;
|
||||
if (on_state_changed_) {
|
||||
on_state_changed_();
|
||||
}
|
||||
break;
|
||||
case kAudioPacketTypeStop:
|
||||
playing_ = false;
|
||||
if (on_state_changed_) {
|
||||
on_state_changed_();
|
||||
}
|
||||
break;
|
||||
case kAudioPacketTypeSentenceStart:
|
||||
ESP_LOGI(TAG, "<< %s", packet->text.c_str());
|
||||
break;
|
||||
case kAudioPacketTypeSentenceEnd:
|
||||
if (breaked_) { // Clear the queue
|
||||
AudioPacket* p;
|
||||
while (xQueueReceive(audio_play_queue_, &p, 0) == pdTRUE) {
|
||||
delete p;
|
||||
}
|
||||
breaked_ = false;
|
||||
playing_ = false;
|
||||
}
|
||||
break;
|
||||
case kAudioPacketTypeData:
|
||||
Write(packet->pcm.data(), packet->pcm.size());
|
||||
last_timestamp_ = packet->timestamp;
|
||||
break;
|
||||
default:
|
||||
ESP_LOGE(TAG, "Unknown audio packet type: %d", packet->type);
|
||||
void AudioDevice::InputTask() {
|
||||
int duration = 30;
|
||||
int input_frame_size = input_sample_rate_ / 1000 * duration;
|
||||
int16_t input_buffer[input_frame_size];
|
||||
while (true) {
|
||||
int samples = Read(input_buffer, input_frame_size);
|
||||
if (samples > 0) {
|
||||
on_input_data_(input_buffer, samples);
|
||||
}
|
||||
delete packet;
|
||||
}
|
||||
}
|
||||
|
||||
void AudioDevice::OnStateChanged(std::function<void()> callback) {
|
||||
on_state_changed_ = callback;
|
||||
}
|
||||
|
||||
void AudioDevice::Break() {
|
||||
breaked_ = true;
|
||||
}
|
||||
|
@ -1,71 +1,44 @@
|
||||
#ifndef _AUDIO_DEVICE_H
|
||||
#define _AUDIO_DEVICE_H
|
||||
|
||||
#include "opus.h"
|
||||
#include "freertos/FreeRTOS.h"
|
||||
#include "freertos/queue.h"
|
||||
#include "freertos/event_groups.h"
|
||||
#include "driver/i2s_std.h"
|
||||
#include <freertos/FreeRTOS.h>
|
||||
#include <freertos/event_groups.h>
|
||||
#include <driver/i2s_std.h>
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <functional>
|
||||
|
||||
enum AudioPacketType {
|
||||
kAudioPacketTypeUnkonwn = 0,
|
||||
kAudioPacketTypeStart,
|
||||
kAudioPacketTypeStop,
|
||||
kAudioPacketTypeData,
|
||||
kAudioPacketTypeSentenceStart,
|
||||
kAudioPacketTypeSentenceEnd
|
||||
};
|
||||
|
||||
struct AudioPacket {
|
||||
AudioPacketType type = kAudioPacketTypeUnkonwn;
|
||||
std::string text;
|
||||
std::vector<uint8_t> opus;
|
||||
std::vector<int16_t> pcm;
|
||||
uint32_t timestamp;
|
||||
};
|
||||
|
||||
class AudioDevice {
|
||||
public:
|
||||
AudioDevice();
|
||||
~AudioDevice();
|
||||
|
||||
void Start(int input_sample_rate, int output_sample_rate);
|
||||
int Read(int16_t* dest, int samples);
|
||||
void Write(const int16_t* data, int samples);
|
||||
void QueueAudioPacket(AudioPacket* packet);
|
||||
void OnStateChanged(std::function<void()> callback);
|
||||
void Break();
|
||||
void OnInputData(std::function<void(const int16_t*, int)> callback);
|
||||
void OutputData(std::vector<int16_t>& data);
|
||||
|
||||
int input_sample_rate() const { return input_sample_rate_; }
|
||||
int output_sample_rate() const { return output_sample_rate_; }
|
||||
bool duplex() const { return duplex_; }
|
||||
bool playing() const { return playing_; }
|
||||
uint32_t last_timestamp() const { return last_timestamp_; }
|
||||
|
||||
private:
|
||||
bool playing_ = false;
|
||||
bool breaked_ = false;
|
||||
bool duplex_ = false;
|
||||
int input_sample_rate_ = 0;
|
||||
int output_sample_rate_ = 0;
|
||||
uint32_t last_timestamp_ = 0;
|
||||
|
||||
i2s_chan_handle_t tx_handle_ = nullptr;
|
||||
i2s_chan_handle_t rx_handle_ = nullptr;
|
||||
|
||||
QueueHandle_t audio_play_queue_ = nullptr;
|
||||
TaskHandle_t audio_play_task_ = nullptr;
|
||||
TaskHandle_t audio_input_task_ = nullptr;
|
||||
|
||||
EventGroupHandle_t event_group_;
|
||||
std::function<void()> on_state_changed_;
|
||||
std::function<void(const int16_t*, int)> on_input_data_;
|
||||
|
||||
void CreateDuplexChannels();
|
||||
void CreateSimplexChannels();
|
||||
void AudioPlayTask();
|
||||
void InputTask();
|
||||
int Read(int16_t* dest, int samples);
|
||||
void Write(const int16_t* data, int samples);
|
||||
};
|
||||
|
||||
#endif // _AUDIO_DEVICE_H
|
||||
|
106
main/AudioProcessor.cc
Normal file
106
main/AudioProcessor.cc
Normal file
@ -0,0 +1,106 @@
|
||||
#include "AudioProcessor.h"
|
||||
#include <esp_log.h>
|
||||
|
||||
#define PROCESSOR_RUNNING 0x01
|
||||
|
||||
static const char* TAG = "AudioProcessor";
|
||||
|
||||
AudioProcessor::AudioProcessor()
|
||||
: afe_communication_data_(nullptr) {
|
||||
event_group_ = xEventGroupCreate();
|
||||
|
||||
afe_config_t afe_config = {
|
||||
.aec_init = false,
|
||||
.se_init = true,
|
||||
.vad_init = false,
|
||||
.wakenet_init = false,
|
||||
.voice_communication_init = true,
|
||||
.voice_communication_agc_init = true,
|
||||
.voice_communication_agc_gain = 10,
|
||||
.vad_mode = VAD_MODE_3,
|
||||
.wakenet_model_name = NULL,
|
||||
.wakenet_model_name_2 = NULL,
|
||||
.wakenet_mode = DET_MODE_90,
|
||||
.afe_mode = SR_MODE_HIGH_PERF,
|
||||
.afe_perferred_core = 0,
|
||||
.afe_perferred_priority = 5,
|
||||
.afe_ringbuf_size = 50,
|
||||
.memory_alloc_mode = AFE_MEMORY_ALLOC_MORE_PSRAM,
|
||||
.afe_linear_gain = 1.0,
|
||||
.agc_mode = AFE_MN_PEAK_AGC_MODE_2,
|
||||
.pcm_config = {
|
||||
.total_ch_num = 1,
|
||||
.mic_num = 1,
|
||||
.ref_num = 0,
|
||||
.sample_rate = CONFIG_AUDIO_INPUT_SAMPLE_RATE,
|
||||
},
|
||||
.debug_init = false,
|
||||
.debug_hook = {{ AFE_DEBUG_HOOK_MASE_TASK_IN, NULL }, { AFE_DEBUG_HOOK_FETCH_TASK_IN, NULL }},
|
||||
.afe_ns_mode = NS_MODE_SSP,
|
||||
.afe_ns_model_name = NULL,
|
||||
.fixed_first_channel = true,
|
||||
};
|
||||
|
||||
afe_communication_data_ = esp_afe_vc_v1.create_from_config(&afe_config);
|
||||
|
||||
xTaskCreate([](void* arg) {
|
||||
auto this_ = (AudioProcessor*)arg;
|
||||
this_->AudioProcessorTask();
|
||||
vTaskDelete(NULL);
|
||||
}, "audio_communication", 4096 * 2, this, 5, NULL);
|
||||
}
|
||||
|
||||
AudioProcessor::~AudioProcessor() {
|
||||
if (afe_communication_data_ != nullptr) {
|
||||
esp_afe_vc_v1.destroy(afe_communication_data_);
|
||||
}
|
||||
vEventGroupDelete(event_group_);
|
||||
}
|
||||
|
||||
void AudioProcessor::Input(const int16_t* data, int size) {
|
||||
input_buffer_.insert(input_buffer_.end(), data, data + size);
|
||||
|
||||
auto chunk_size = esp_afe_vc_v1.get_feed_chunksize(afe_communication_data_);
|
||||
while (input_buffer_.size() >= chunk_size) {
|
||||
auto chunk = input_buffer_.data();
|
||||
esp_afe_vc_v1.feed(afe_communication_data_, chunk);
|
||||
input_buffer_.erase(input_buffer_.begin(), input_buffer_.begin() + chunk_size);
|
||||
}
|
||||
}
|
||||
|
||||
void AudioProcessor::Start() {
|
||||
xEventGroupSetBits(event_group_, PROCESSOR_RUNNING);
|
||||
}
|
||||
|
||||
void AudioProcessor::Stop() {
|
||||
xEventGroupClearBits(event_group_, PROCESSOR_RUNNING);
|
||||
}
|
||||
|
||||
bool AudioProcessor::IsRunning() {
|
||||
return xEventGroupGetBits(event_group_) & PROCESSOR_RUNNING;
|
||||
}
|
||||
|
||||
void AudioProcessor::OnOutput(std::function<void(std::vector<int16_t>&& data)> callback) {
|
||||
output_callback_ = callback;
|
||||
}
|
||||
|
||||
void AudioProcessor::AudioProcessorTask() {
|
||||
int chunk_size = esp_afe_vc_v1.get_fetch_chunksize(afe_communication_data_);
|
||||
ESP_LOGI(TAG, "Audio communication task started, chunk size: %d", chunk_size);
|
||||
|
||||
while (true) {
|
||||
xEventGroupWaitBits(event_group_, PROCESSOR_RUNNING, pdFALSE, pdTRUE, portMAX_DELAY);
|
||||
|
||||
auto res = esp_afe_vc_v1.fetch(afe_communication_data_);
|
||||
if (res == nullptr || res->ret_value == ESP_FAIL) {
|
||||
if (res != nullptr) {
|
||||
ESP_LOGI(TAG, "Error code: %d", res->ret_value);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (output_callback_) {
|
||||
output_callback_(std::vector<int16_t>(res->data, res->data + res->data_size / sizeof(int16_t)));
|
||||
}
|
||||
}
|
||||
}
|
33
main/AudioProcessor.h
Normal file
33
main/AudioProcessor.h
Normal file
@ -0,0 +1,33 @@
|
||||
#ifndef AUDIO_PROCESSOR_H
|
||||
#define AUDIO_PROCESSOR_H
|
||||
|
||||
#include <esp_afe_sr_models.h>
|
||||
#include <freertos/FreeRTOS.h>
|
||||
#include <freertos/task.h>
|
||||
#include <freertos/event_groups.h>
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <functional>
|
||||
|
||||
class AudioProcessor {
|
||||
public:
|
||||
AudioProcessor();
|
||||
~AudioProcessor();
|
||||
|
||||
void Input(const int16_t* data, int size);
|
||||
void Start();
|
||||
void Stop();
|
||||
bool IsRunning();
|
||||
void OnOutput(std::function<void(std::vector<int16_t>&& data)> callback);
|
||||
|
||||
private:
|
||||
EventGroupHandle_t event_group_ = nullptr;
|
||||
esp_afe_sr_data_t* afe_communication_data_ = nullptr;
|
||||
std::vector<int16_t> input_buffer_;
|
||||
std::function<void(std::vector<int16_t>&& data)> output_callback_;
|
||||
|
||||
void AudioProcessorTask();
|
||||
};
|
||||
|
||||
#endif
|
67
main/Button.cc
Normal file
67
main/Button.cc
Normal file
@ -0,0 +1,67 @@
|
||||
#include "Button.h"
|
||||
#include <esp_log.h>
|
||||
|
||||
static const char* TAG = "Button";
|
||||
|
||||
Button::Button(gpio_num_t gpio_num) : gpio_num_(gpio_num) {
|
||||
button_config_t button_config = {
|
||||
.type = BUTTON_TYPE_GPIO,
|
||||
.long_press_time = 3000,
|
||||
.short_press_time = 100,
|
||||
.gpio_button_config = {
|
||||
.gpio_num = gpio_num,
|
||||
.active_level = 0
|
||||
}
|
||||
};
|
||||
button_handle_ = iot_button_create(&button_config);
|
||||
if (button_handle_ == NULL) {
|
||||
ESP_LOGE(TAG, "Failed to create button handle");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
Button::~Button() {
|
||||
if (button_handle_ != NULL) {
|
||||
iot_button_delete(button_handle_);
|
||||
}
|
||||
}
|
||||
|
||||
void Button::OnPress(std::function<void()> callback) {
|
||||
on_press_ = callback;
|
||||
iot_button_register_cb(button_handle_, BUTTON_PRESS_DOWN, [](void* handle, void* usr_data) {
|
||||
Button* button = static_cast<Button*>(usr_data);
|
||||
if (button->on_press_) {
|
||||
button->on_press_();
|
||||
}
|
||||
}, this);
|
||||
}
|
||||
|
||||
void Button::OnLongPress(std::function<void()> callback) {
|
||||
on_long_press_ = callback;
|
||||
iot_button_register_cb(button_handle_, BUTTON_LONG_PRESS_START, [](void* handle, void* usr_data) {
|
||||
Button* button = static_cast<Button*>(usr_data);
|
||||
if (button->on_long_press_) {
|
||||
button->on_long_press_();
|
||||
}
|
||||
}, this);
|
||||
}
|
||||
|
||||
void Button::OnClick(std::function<void()> callback) {
|
||||
on_click_ = callback;
|
||||
iot_button_register_cb(button_handle_, BUTTON_SINGLE_CLICK, [](void* handle, void* usr_data) {
|
||||
Button* button = static_cast<Button*>(usr_data);
|
||||
if (button->on_click_) {
|
||||
button->on_click_();
|
||||
}
|
||||
}, this);
|
||||
}
|
||||
|
||||
void Button::OnDoubleClick(std::function<void()> callback) {
|
||||
on_double_click_ = callback;
|
||||
iot_button_register_cb(button_handle_, BUTTON_DOUBLE_CLICK, [](void* handle, void* usr_data) {
|
||||
Button* button = static_cast<Button*>(usr_data);
|
||||
if (button->on_double_click_) {
|
||||
button->on_double_click_();
|
||||
}
|
||||
}, this);
|
||||
}
|
28
main/Button.h
Normal file
28
main/Button.h
Normal file
@ -0,0 +1,28 @@
|
||||
#ifndef BUTTON_H_
|
||||
#define BUTTON_H_
|
||||
|
||||
#include <driver/gpio.h>
|
||||
#include <iot_button.h>
|
||||
#include <functional>
|
||||
|
||||
class Button {
|
||||
public:
|
||||
Button(gpio_num_t gpio_num);
|
||||
~Button();
|
||||
|
||||
void OnPress(std::function<void()> callback);
|
||||
void OnLongPress(std::function<void()> callback);
|
||||
void OnClick(std::function<void()> callback);
|
||||
void OnDoubleClick(std::function<void()> callback);
|
||||
private:
|
||||
gpio_num_t gpio_num_;
|
||||
button_handle_t button_handle_;
|
||||
|
||||
|
||||
std::function<void()> on_press_;
|
||||
std::function<void()> on_long_press_;
|
||||
std::function<void()> on_click_;
|
||||
std::function<void()> on_double_click_;
|
||||
};
|
||||
|
||||
#endif // BUTTON_H_
|
@ -4,9 +4,14 @@ set(SOURCES "AudioDevice.cc"
|
||||
"SystemReset.cc"
|
||||
"Application.cc"
|
||||
"Display.cc"
|
||||
"Button.cc"
|
||||
"main.cc"
|
||||
)
|
||||
|
||||
if(CONFIG_USE_AFE_SR)
|
||||
list(APPEND SOURCES "AudioProcessor.cc" "WakeWordDetect.cc")
|
||||
endif()
|
||||
|
||||
idf_component_register(SRCS ${SOURCES}
|
||||
INCLUDE_DIRS "."
|
||||
)
|
||||
|
@ -1,11 +1,11 @@
|
||||
|
||||
#include "Display.h"
|
||||
|
||||
#include "esp_log.h"
|
||||
#include "esp_err.h"
|
||||
#include "esp_lcd_panel_ops.h"
|
||||
#include "esp_lcd_panel_vendor.h"
|
||||
#include "esp_lvgl_port.h"
|
||||
#include <esp_log.h>
|
||||
#include <esp_err.h>
|
||||
#include <esp_lcd_panel_ops.h>
|
||||
#include <esp_lcd_panel_vendor.h>
|
||||
#include <esp_lvgl_port.h>
|
||||
#include <string>
|
||||
#include <cstdlib>
|
||||
|
||||
|
@ -1,10 +1,10 @@
|
||||
#ifndef DISPLAY_H
|
||||
#define DISPLAY_H
|
||||
|
||||
#include "driver/i2c_master.h"
|
||||
#include "esp_lcd_panel_io.h"
|
||||
#include "esp_lcd_panel_ops.h"
|
||||
#include "lvgl.h"
|
||||
#include <driver/i2c_master.h>
|
||||
#include <esp_lcd_panel_io.h>
|
||||
#include <esp_lcd_panel_ops.h>
|
||||
#include <lvgl.h>
|
||||
|
||||
#include <string>
|
||||
|
||||
|
@ -1,12 +1,12 @@
|
||||
#include "FirmwareUpgrade.h"
|
||||
#include "SystemInfo.h"
|
||||
#include "cJSON.h"
|
||||
#include "esp_log.h"
|
||||
#include "esp_partition.h"
|
||||
#include "esp_http_client.h"
|
||||
#include "esp_ota_ops.h"
|
||||
#include "esp_app_format.h"
|
||||
#include "Ml307Http.h"
|
||||
#include <cJSON.h>
|
||||
#include <esp_log.h>
|
||||
#include <esp_partition.h>
|
||||
#include <esp_http_client.h>
|
||||
#include <esp_ota_ops.h>
|
||||
#include <esp_app_format.h>
|
||||
|
||||
#include <vector>
|
||||
#include <sstream>
|
||||
#include <algorithm>
|
||||
|
@ -4,7 +4,8 @@
|
||||
#include <functional>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include "Http.h"
|
||||
|
||||
#include <Http.h>
|
||||
|
||||
class FirmwareUpgrade {
|
||||
public:
|
||||
|
@ -74,6 +74,18 @@ config AUDIO_DEVICE_I2S_SPK_GPIO_WS
|
||||
help
|
||||
GPIO number of the I2S MIC WS.
|
||||
|
||||
config BOOT_BUTTON_GPIO
|
||||
int "Boot Button GPIO"
|
||||
default 0
|
||||
help
|
||||
GPIO number of the boot button.
|
||||
|
||||
config USE_AFE_SR
|
||||
bool "Use Espressif AFE SR"
|
||||
default y
|
||||
help
|
||||
Use AFE SR for wake word detection.
|
||||
|
||||
config USE_ML307
|
||||
bool "Use ML307"
|
||||
default n
|
||||
|
@ -1,15 +1,13 @@
|
||||
#include "SystemInfo.h"
|
||||
#include "freertos/task.h"
|
||||
#include "esp_log.h"
|
||||
#include "esp_flash.h"
|
||||
#include "esp_mac.h"
|
||||
#include "esp_chip_info.h"
|
||||
#include "esp_system.h"
|
||||
#include "esp_partition.h"
|
||||
#include "esp_app_desc.h"
|
||||
#include "esp_psram.h"
|
||||
#include "esp_wifi.h"
|
||||
#include "esp_ota_ops.h"
|
||||
#include <freertos/task.h>
|
||||
#include <esp_log.h>
|
||||
#include <esp_flash.h>
|
||||
#include <esp_mac.h>
|
||||
#include <esp_chip_info.h>
|
||||
#include <esp_system.h>
|
||||
#include <esp_partition.h>
|
||||
#include <esp_app_desc.h>
|
||||
#include <esp_ota_ops.h>
|
||||
|
||||
|
||||
#define TAG "SystemInfo"
|
||||
@ -80,7 +78,6 @@ std::string SystemInfo::GetJsonString() {
|
||||
*/
|
||||
std::string json = "{";
|
||||
json += "\"flash_size\":" + std::to_string(GetFlashSize()) + ",";
|
||||
json += "\"psram_size\":" + std::to_string(esp_psram_get_size()) + ",";
|
||||
json += "\"minimum_free_heap_size\":" + std::to_string(GetMinimumFreeHeapSize()) + ",";
|
||||
json += "\"mac_address\":\"" + GetMacAddress() + "\",";
|
||||
json += "\"chip_model_name\":\"" + GetChipModelName() + "\",";
|
||||
|
@ -3,8 +3,8 @@
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "esp_err.h"
|
||||
#include "freertos/FreeRTOS.h"
|
||||
#include <esp_err.h>
|
||||
#include <freertos/FreeRTOS.h>
|
||||
|
||||
class SystemInfo {
|
||||
public:
|
||||
|
@ -1,10 +1,10 @@
|
||||
#include "SystemReset.h"
|
||||
#include "esp_log.h"
|
||||
#include "nvs_flash.h"
|
||||
#include "driver/gpio.h"
|
||||
#include "esp_partition.h"
|
||||
#include "esp_system.h"
|
||||
#include "freertos/FreeRTOS.h"
|
||||
#include <esp_log.h>
|
||||
#include <nvs_flash.h>
|
||||
#include <driver/gpio.h>
|
||||
#include <esp_partition.h>
|
||||
#include <esp_system.h>
|
||||
#include <freertos/FreeRTOS.h>
|
||||
|
||||
|
||||
#define TAG "SystemReset"
|
||||
|
203
main/WakeWordDetect.cc
Normal file
203
main/WakeWordDetect.cc
Normal file
@ -0,0 +1,203 @@
|
||||
#include <esp_log.h>
|
||||
#include <model_path.h>
|
||||
|
||||
#include "WakeWordDetect.h"
|
||||
#include "Application.h"
|
||||
|
||||
#define DETECTION_RUNNING_EVENT 1
|
||||
#define WAKE_WORD_ENCODED_EVENT 2
|
||||
|
||||
static const char* TAG = "WakeWordDetect";
|
||||
|
||||
WakeWordDetect::WakeWordDetect()
|
||||
: afe_detection_data_(nullptr),
|
||||
wake_word_pcm_(),
|
||||
wake_word_opus_() {
|
||||
|
||||
event_group_ = xEventGroupCreate();
|
||||
|
||||
srmodel_list_t *models = esp_srmodel_init("model");
|
||||
for (int i = 0; i < models->num; i++) {
|
||||
ESP_LOGI(TAG, "Model %d: %s", i, models->model_name[i]);
|
||||
if (strstr(models->model_name[i], ESP_WN_PREFIX) != NULL) {
|
||||
wakenet_model_ = models->model_name[i];
|
||||
}
|
||||
}
|
||||
|
||||
afe_config_t afe_config = {
|
||||
.aec_init = false,
|
||||
.se_init = true,
|
||||
.vad_init = true,
|
||||
.wakenet_init = true,
|
||||
.voice_communication_init = false,
|
||||
.voice_communication_agc_init = false,
|
||||
.voice_communication_agc_gain = 10,
|
||||
.vad_mode = VAD_MODE_3,
|
||||
.wakenet_model_name = wakenet_model_,
|
||||
.wakenet_model_name_2 = NULL,
|
||||
.wakenet_mode = DET_MODE_90,
|
||||
.afe_mode = SR_MODE_HIGH_PERF,
|
||||
.afe_perferred_core = 0,
|
||||
.afe_perferred_priority = 5,
|
||||
.afe_ringbuf_size = 50,
|
||||
.memory_alloc_mode = AFE_MEMORY_ALLOC_MORE_PSRAM,
|
||||
.afe_linear_gain = 1.0,
|
||||
.agc_mode = AFE_MN_PEAK_AGC_MODE_2,
|
||||
.pcm_config = {
|
||||
.total_ch_num = 1,
|
||||
.mic_num = 1,
|
||||
.ref_num = 0,
|
||||
.sample_rate = CONFIG_AUDIO_INPUT_SAMPLE_RATE
|
||||
},
|
||||
.debug_init = false,
|
||||
.debug_hook = {{ AFE_DEBUG_HOOK_MASE_TASK_IN, NULL }, { AFE_DEBUG_HOOK_FETCH_TASK_IN, NULL }},
|
||||
.afe_ns_mode = NS_MODE_SSP,
|
||||
.afe_ns_model_name = NULL,
|
||||
.fixed_first_channel = true,
|
||||
};
|
||||
|
||||
afe_detection_data_ = esp_afe_sr_v1.create_from_config(&afe_config);
|
||||
|
||||
xTaskCreate([](void* arg) {
|
||||
auto this_ = (WakeWordDetect*)arg;
|
||||
this_->AudioDetectionTask();
|
||||
vTaskDelete(NULL);
|
||||
}, "audio_detection", 4096 * 2, this, 5, NULL);
|
||||
}
|
||||
|
||||
WakeWordDetect::~WakeWordDetect() {
|
||||
if (afe_detection_data_ != nullptr) {
|
||||
esp_afe_sr_v1.destroy(afe_detection_data_);
|
||||
}
|
||||
|
||||
if (wake_word_encode_task_stack_ != nullptr) {
|
||||
free(wake_word_encode_task_stack_);
|
||||
}
|
||||
|
||||
vEventGroupDelete(event_group_);
|
||||
}
|
||||
|
||||
void WakeWordDetect::OnWakeWordDetected(std::function<void()> callback) {
|
||||
wake_word_detected_callback_ = callback;
|
||||
}
|
||||
|
||||
void WakeWordDetect::OnVadStateChange(std::function<void(bool speaking)> callback) {
|
||||
vad_state_change_callback_ = callback;
|
||||
}
|
||||
|
||||
void WakeWordDetect::StartDetection() {
|
||||
xEventGroupSetBits(event_group_, DETECTION_RUNNING_EVENT);
|
||||
}
|
||||
|
||||
void WakeWordDetect::StopDetection() {
|
||||
xEventGroupClearBits(event_group_, DETECTION_RUNNING_EVENT);
|
||||
}
|
||||
|
||||
bool WakeWordDetect::IsDetectionRunning() {
|
||||
return xEventGroupGetBits(event_group_) & DETECTION_RUNNING_EVENT;
|
||||
}
|
||||
|
||||
void WakeWordDetect::Feed(const int16_t* data, int size) {
|
||||
input_buffer_.insert(input_buffer_.end(), data, data + size);
|
||||
|
||||
auto chunk_size = esp_afe_sr_v1.get_feed_chunksize(afe_detection_data_);
|
||||
while (input_buffer_.size() >= chunk_size) {
|
||||
esp_afe_sr_v1.feed(afe_detection_data_, input_buffer_.data());
|
||||
input_buffer_.erase(input_buffer_.begin(), input_buffer_.begin() + chunk_size);
|
||||
}
|
||||
}
|
||||
|
||||
void WakeWordDetect::AudioDetectionTask() {
|
||||
auto chunk_size = esp_afe_sr_v1.get_fetch_chunksize(afe_detection_data_);
|
||||
ESP_LOGI(TAG, "Audio detection task started, chunk size: %d", chunk_size);
|
||||
|
||||
while (true) {
|
||||
xEventGroupWaitBits(event_group_, DETECTION_RUNNING_EVENT, pdFALSE, pdTRUE, portMAX_DELAY);
|
||||
|
||||
auto res = esp_afe_sr_v1.fetch(afe_detection_data_);
|
||||
if (res == nullptr || res->ret_value == ESP_FAIL) {
|
||||
if (res != nullptr) {
|
||||
ESP_LOGI(TAG, "Error code: %d", res->ret_value);
|
||||
}
|
||||
continue;;
|
||||
}
|
||||
|
||||
// Store the wake word data for voice recognition, like who is speaking
|
||||
StoreWakeWordData((uint16_t*)res->data, res->data_size / sizeof(uint16_t));
|
||||
|
||||
// VAD state change
|
||||
if (vad_state_change_callback_) {
|
||||
if (res->vad_state == AFE_VAD_SPEECH && !is_speaking_) {
|
||||
is_speaking_ = true;
|
||||
vad_state_change_callback_(true);
|
||||
} else if (res->vad_state == AFE_VAD_SILENCE && is_speaking_) {
|
||||
is_speaking_ = false;
|
||||
vad_state_change_callback_(false);
|
||||
}
|
||||
}
|
||||
|
||||
if (res->wakeup_state == WAKENET_DETECTED) {
|
||||
ESP_LOGI(TAG, "Wake word detected");
|
||||
StopDetection();
|
||||
|
||||
if (wake_word_detected_callback_) {
|
||||
wake_word_detected_callback_();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void WakeWordDetect::StoreWakeWordData(uint16_t* data, size_t samples) {
|
||||
// store audio data to wake_word_pcm_
|
||||
std::vector<int16_t> pcm(data, data + samples);
|
||||
wake_word_pcm_.emplace_back(std::move(pcm));
|
||||
// keep about 2 seconds of data, detect duration is 32ms (sample_rate == 16000, chunksize == 512)
|
||||
while (wake_word_pcm_.size() > 2000 / 32) {
|
||||
wake_word_pcm_.pop_front();
|
||||
}
|
||||
}
|
||||
|
||||
void WakeWordDetect::EncodeWakeWordData() {
|
||||
if (wake_word_encode_task_stack_ == nullptr) {
|
||||
wake_word_encode_task_stack_ = (StackType_t*)malloc(4096 * 8);
|
||||
}
|
||||
wake_word_encode_task_ = xTaskCreateStatic([](void* arg) {
|
||||
auto this_ = (WakeWordDetect*)arg;
|
||||
auto start_time = esp_timer_get_time();
|
||||
// encode detect packets
|
||||
OpusEncoder* encoder = new OpusEncoder();
|
||||
encoder->Configure(CONFIG_AUDIO_INPUT_SAMPLE_RATE, 1, 60);
|
||||
encoder->SetComplexity(0);
|
||||
this_->wake_word_opus_.resize(4096 * 4);
|
||||
size_t offset = 0;
|
||||
|
||||
for (auto& pcm: this_->wake_word_pcm_) {
|
||||
encoder->Encode(pcm, [this_, &offset](const uint8_t* opus, size_t opus_size) {
|
||||
size_t protocol_size = sizeof(BinaryProtocol) + opus_size;
|
||||
if (offset + protocol_size < this_->wake_word_opus_.size()) {
|
||||
auto protocol = (BinaryProtocol*)(&this_->wake_word_opus_[offset]);
|
||||
protocol->version = htons(PROTOCOL_VERSION);
|
||||
protocol->type = htons(0);
|
||||
protocol->reserved = 0;
|
||||
protocol->timestamp = 0;
|
||||
protocol->payload_size = htonl(opus_size);
|
||||
memcpy(protocol->payload, opus, opus_size);
|
||||
offset += protocol_size;
|
||||
}
|
||||
});
|
||||
}
|
||||
this_->wake_word_pcm_.clear();
|
||||
this_->wake_word_opus_.resize(offset);
|
||||
|
||||
auto end_time = esp_timer_get_time();
|
||||
ESP_LOGI(TAG, "Encode wake word opus: %zu bytes in %lld ms", this_->wake_word_opus_.size(), (end_time - start_time) / 1000);
|
||||
xEventGroupSetBits(this_->event_group_, WAKE_WORD_ENCODED_EVENT);
|
||||
delete encoder;
|
||||
vTaskDelete(NULL);
|
||||
}, "encode_detect_packets", 4096 * 8, this, 1, wake_word_encode_task_stack_, &wake_word_encode_task_buffer_);
|
||||
}
|
||||
|
||||
const std::string&& WakeWordDetect::GetWakeWordStream() {
|
||||
xEventGroupWaitBits(event_group_, WAKE_WORD_ENCODED_EVENT, pdTRUE, pdTRUE, portMAX_DELAY);
|
||||
return std::move(wake_word_opus_);
|
||||
}
|
50
main/WakeWordDetect.h
Normal file
50
main/WakeWordDetect.h
Normal file
@ -0,0 +1,50 @@
|
||||
#ifndef WAKE_WORD_DETECT_H
|
||||
#define WAKE_WORD_DETECT_H
|
||||
|
||||
#include <esp_afe_sr_models.h>
|
||||
#include <esp_nsn_models.h>
|
||||
|
||||
#include <freertos/FreeRTOS.h>
|
||||
#include <freertos/task.h>
|
||||
#include <freertos/event_groups.h>
|
||||
|
||||
#include <list>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <functional>
|
||||
|
||||
|
||||
class WakeWordDetect {
|
||||
public:
|
||||
WakeWordDetect();
|
||||
~WakeWordDetect();
|
||||
|
||||
void Feed(const int16_t* data, int size);
|
||||
void OnWakeWordDetected(std::function<void()> callback);
|
||||
void OnVadStateChange(std::function<void(bool speaking)> callback);
|
||||
void StartDetection();
|
||||
void StopDetection();
|
||||
bool IsDetectionRunning();
|
||||
void EncodeWakeWordData();
|
||||
const std::string&& GetWakeWordStream();
|
||||
|
||||
private:
|
||||
esp_afe_sr_data_t* afe_detection_data_ = nullptr;
|
||||
char* wakenet_model_ = NULL;
|
||||
std::vector<int16_t> input_buffer_;
|
||||
EventGroupHandle_t event_group_;
|
||||
std::function<void()> wake_word_detected_callback_;
|
||||
std::function<void(bool speaking)> vad_state_change_callback_;
|
||||
bool is_speaking_ = false;
|
||||
|
||||
TaskHandle_t wake_word_encode_task_ = nullptr;
|
||||
StaticTask_t wake_word_encode_task_buffer_;
|
||||
StackType_t* wake_word_encode_task_stack_ = nullptr;
|
||||
std::list<std::vector<int16_t>> wake_word_pcm_;
|
||||
std::string wake_word_opus_;
|
||||
|
||||
void StoreWakeWordData(uint16_t* data, size_t size);
|
||||
void AudioDetectionTask();
|
||||
};
|
||||
|
||||
#endif
|
@ -1,10 +1,11 @@
|
||||
## IDF Component Manager Manifest File
|
||||
dependencies:
|
||||
78/esp-builtin-led: "^1.0.1"
|
||||
78/esp-builtin-led: "^1.0.2"
|
||||
78/esp-wifi-connect: "^1.1.0"
|
||||
78/esp-opus-encoder: "^1.0.1"
|
||||
78/esp-ml307: "^1.1.0"
|
||||
78/esp-opus-encoder: "^1.0.2"
|
||||
78/esp-ml307: "^1.1.1"
|
||||
espressif/esp-sr: "^1.9.0"
|
||||
espressif/button: "^3.3.1"
|
||||
lvgl/lvgl: "^8.4.0"
|
||||
esp_lvgl_port: "^1.4.0"
|
||||
## Required IDF version
|
||||
|
22
main/main.cc
22
main/main.cc
@ -1,18 +1,15 @@
|
||||
#include <cstdio>
|
||||
|
||||
#include "esp_log.h"
|
||||
#include "esp_err.h"
|
||||
#include "nvs.h"
|
||||
#include "nvs_flash.h"
|
||||
#include "driver/gpio.h"
|
||||
#include "esp_event.h"
|
||||
#include <esp_log.h>
|
||||
#include <esp_err.h>
|
||||
#include <nvs.h>
|
||||
#include <nvs_flash.h>
|
||||
#include <driver/gpio.h>
|
||||
#include <esp_event.h>
|
||||
|
||||
#include "Application.h"
|
||||
#include "SystemInfo.h"
|
||||
#include "SystemReset.h"
|
||||
|
||||
#define TAG "main"
|
||||
#define STATS_TICKS pdMS_TO_TICKS(1000)
|
||||
|
||||
extern "C" void app_main(void)
|
||||
{
|
||||
@ -37,8 +34,9 @@ extern "C" void app_main(void)
|
||||
// Dump CPU usage every 10 second
|
||||
while (true) {
|
||||
vTaskDelay(10000 / portTICK_PERIOD_MS);
|
||||
// SystemInfo::PrintRealTimeStats(STATS_TICKS);
|
||||
int free_sram = heap_caps_get_minimum_free_size(MALLOC_CAP_INTERNAL);
|
||||
ESP_LOGI(TAG, "Free heap size: %u minimal internal: %u", SystemInfo::GetFreeHeapSize(), free_sram);
|
||||
// SystemInfo::PrintRealTimeStats(pdMS_TO_TICKS(1000));
|
||||
int free_sram = heap_caps_get_free_size(MALLOC_CAP_INTERNAL);
|
||||
int min_free_sram = heap_caps_get_minimum_free_size(MALLOC_CAP_INTERNAL);
|
||||
ESP_LOGI(TAG, "Free internal: %u minimal internal: %u", free_sram, min_free_sram);
|
||||
}
|
||||
}
|
||||
|
7
partitions_4M.csv
Normal file
7
partitions_4M.csv
Normal file
@ -0,0 +1,7 @@
|
||||
# ESP-IDF Partition Table
|
||||
# Name, Type, SubType, Offset, Size, Flags
|
||||
nvs, data, nvs, 0x9000, 0x4000,
|
||||
otadata, data, ota, 0xd000, 0x2000,
|
||||
phy_init, data, phy, 0xf000, 0x1000,
|
||||
model, data, spiffs, 0x10000, 0xF0000,
|
||||
factory, app, factory, 0x100000, 3M,
|
|
@ -3,23 +3,11 @@ CONFIG_BOOTLOADER_LOG_LEVEL_NONE=y
|
||||
CONFIG_BOOTLOADER_SKIP_VALIDATE_ALWAYS=y
|
||||
CONFIG_BOOTLOADER_APP_ROLLBACK_ENABLE=y
|
||||
|
||||
CONFIG_ESP_DEFAULT_CPU_FREQ_MHZ_240=y
|
||||
|
||||
CONFIG_SPIRAM=y
|
||||
CONFIG_SPIRAM_MODE_OCT=y
|
||||
CONFIG_SPIRAM_SPEED_80M=y
|
||||
CONFIG_SPIRAM_MALLOC_ALWAYSINTERNAL=4096
|
||||
CONFIG_SPIRAM_TRY_ALLOCATE_WIFI_LWIP=y
|
||||
CONFIG_SPIRAM_MALLOC_RESERVE_INTERNAL=32768
|
||||
CONFIG_SPIRAM_MEMTEST=n
|
||||
CONFIG_MBEDTLS_EXTERNAL_MEM_ALLOC=y
|
||||
|
||||
CONFIG_HTTPD_MAX_REQ_HDR_LEN=2048
|
||||
CONFIG_HTTPD_MAX_URI_LEN=2048
|
||||
|
||||
CONFIG_PARTITION_TABLE_CUSTOM=y
|
||||
CONFIG_PARTITION_TABLE_CUSTOM_FILENAME="partitions.csv"
|
||||
CONFIG_PARTITION_TABLE_FILENAME="partitions.csv"
|
||||
CONFIG_PARTITION_TABLE_OFFSET=0x8000
|
||||
|
||||
CONFIG_USE_WAKENET=y
|
||||
|
6
sdkconfig.defaults.esp32c3
Normal file
6
sdkconfig.defaults.esp32c3
Normal file
@ -0,0 +1,6 @@
|
||||
|
||||
CONFIG_ESPTOOLPY_FLASHSIZE_16MB=y
|
||||
|
||||
CONFIG_PARTITION_TABLE_CUSTOM=y
|
||||
CONFIG_PARTITION_TABLE_CUSTOM_FILENAME="partitions_4M.csv"
|
||||
CONFIG_PARTITION_TABLE_OFFSET=0x8000
|
@ -2,6 +2,17 @@
|
||||
CONFIG_ESPTOOLPY_FLASHSIZE_16MB=y
|
||||
CONFIG_ESPTOOLPY_FLASHMODE_QIO=y
|
||||
|
||||
CONFIG_ESP_DEFAULT_CPU_FREQ_MHZ_240=y
|
||||
|
||||
CONFIG_SPIRAM=y
|
||||
CONFIG_SPIRAM_MODE_OCT=y
|
||||
CONFIG_SPIRAM_SPEED_80M=y
|
||||
CONFIG_SPIRAM_MALLOC_ALWAYSINTERNAL=4096
|
||||
CONFIG_SPIRAM_TRY_ALLOCATE_WIFI_LWIP=y
|
||||
CONFIG_SPIRAM_MALLOC_RESERVE_INTERNAL=32768
|
||||
CONFIG_SPIRAM_MEMTEST=n
|
||||
CONFIG_MBEDTLS_EXTERNAL_MEM_ALLOC=y
|
||||
|
||||
CONFIG_ESP32S3_INSTRUCTION_CACHE_32KB=y
|
||||
CONFIG_ESP32S3_DATA_CACHE_64KB=y
|
||||
CONFIG_ESP32S3_DATA_CACHE_LINE_64B=y
|
||||
CONFIG_ESP32S3_DATA_CACHE_LINE_64B=y
|
||||
|
Reference in New Issue
Block a user