From db9d837d296a03f83475a1ace1243914e0c37c86 Mon Sep 17 00:00:00 2001 From: kahrendt Date: Sun, 18 Feb 2024 00:50:24 -0500 Subject: [PATCH] Add more debugging logs to microWakeWord (#6238) --- .../components/micro_wake_word/__init__.py | 2 +- .../micro_wake_word/micro_wake_word.cpp | 44 +++++++++++++------ .../micro_wake_word/micro_wake_word.h | 3 ++ 3 files changed, 35 insertions(+), 14 deletions(-) diff --git a/esphome/components/micro_wake_word/__init__.py b/esphome/components/micro_wake_word/__init__.py index 2a84b7d74b..38202bdfb9 100644 --- a/esphome/components/micro_wake_word/__init__.py +++ b/esphome/components/micro_wake_word/__init__.py @@ -261,7 +261,7 @@ CONFIG_SCHEMA = cv.All( { cv.GenerateID(): cv.declare_id(MicroWakeWord), cv.GenerateID(CONF_MICROPHONE): cv.use_id(microphone.Microphone), - cv.Optional(CONF_PROBABILITY_CUTOFF): cv.float_, + cv.Optional(CONF_PROBABILITY_CUTOFF): cv.percentage, cv.Optional(CONF_SLIDING_WINDOW_AVERAGE_SIZE): cv.positive_int, cv.Optional(CONF_ON_WAKE_WORD_DETECTED): automation.validate_automation( single=True diff --git a/esphome/components/micro_wake_word/micro_wake_word.cpp b/esphome/components/micro_wake_word/micro_wake_word.cpp index 8a443bc224..f0b3d55a9d 100644 --- a/esphome/components/micro_wake_word/micro_wake_word.cpp +++ b/esphome/components/micro_wake_word/micro_wake_word.cpp @@ -53,8 +53,15 @@ static const LogString *micro_wake_word_state_to_string(State state) { } } +void MicroWakeWord::dump_config() { + ESP_LOGCONFIG(TAG, "microWakeWord:"); + ESP_LOGCONFIG(TAG, " Wake Word: %s", this->get_wake_word().c_str()); + ESP_LOGCONFIG(TAG, " Probability cutoff: %.3f", this->probability_cutoff_); + ESP_LOGCONFIG(TAG, " Sliding window size: %d", this->sliding_window_average_size_); +} + void MicroWakeWord::setup() { - ESP_LOGCONFIG(TAG, "Setting up Micro Wake Word..."); + ESP_LOGCONFIG(TAG, "Setting up microWakeWord..."); if (!this->initialize_models()) { ESP_LOGE(TAG, "Failed to initialize models"); @@ -63,7 +70,7 @@ void MicroWakeWord::setup() { } ExternalRAMAllocator allocator(ExternalRAMAllocator::ALLOW_FAILURE); - this->input_buffer_ = allocator.allocate(NEW_SAMPLES_TO_GET); + this->input_buffer_ = allocator.allocate(INPUT_BUFFER_SIZE * sizeof(int16_t)); if (this->input_buffer_ == nullptr) { ESP_LOGW(TAG, "Could not allocate input buffer"); this->mark_failed(); @@ -81,7 +88,7 @@ void MicroWakeWord::setup() { } int MicroWakeWord::read_microphone_() { - size_t bytes_read = this->microphone_->read(this->input_buffer_, NEW_SAMPLES_TO_GET * sizeof(int16_t)); + size_t bytes_read = this->microphone_->read(this->input_buffer_, INPUT_BUFFER_SIZE * sizeof(int16_t)); if (bytes_read == 0) { return 0; } @@ -279,11 +286,6 @@ bool MicroWakeWord::initialize_models() { } bool MicroWakeWord::update_features_() { - // Verify we have enough samples for a feature slice - if (!this->slice_available_()) { - return false; - } - // Retrieve strided audio samples int16_t *audio_samples = nullptr; if (!this->stride_audio_samples_(&audio_samples)) { @@ -369,20 +371,36 @@ void MicroWakeWord::set_sliding_window_average_size(size_t size) { bool MicroWakeWord::slice_available_() { size_t available = this->ring_buffer_->available(); + size_t free = this->ring_buffer_->free(); + + if (free < NEW_SAMPLES_TO_GET * sizeof(int16_t)) { + // If the ring buffer is within one audio slice of being full, then wake word detection will have issues. + // If this is constantly occuring, then some possibilities why are + // 1) there are too many other slow components configured + // 2) the ESP32 isn't fast enough; e.g., an ESP32 is much slower than an ESP32-S3 at inferences. + // 3) the model is too large + // 4) the model uses operations that are not optimized + ESP_LOGW(TAG, + "Audio buffer is nearly full. Wake word detection may be less accurate and have slower reponse times. " +#if !defined(USE_ESP32_VARIANT_ESP32S3) + "microWakeWord is designed for the ESP32-S3. The current platform is too slow for this model." +#endif + ); + } + return available > (NEW_SAMPLES_TO_GET * sizeof(int16_t)); } bool MicroWakeWord::stride_audio_samples_(int16_t **audio_samples) { + if (!this->slice_available_()) { + return false; + } + // Copy 320 bytes (160 samples over 10 ms) into preprocessor_audio_buffer_ from history in // preprocessor_stride_buffer_ memcpy((void *) (this->preprocessor_audio_buffer_), (void *) (this->preprocessor_stride_buffer_), HISTORY_SAMPLES_TO_KEEP * sizeof(int16_t)); - if (this->ring_buffer_->available() < NEW_SAMPLES_TO_GET * sizeof(int16_t)) { - ESP_LOGD(TAG, "Audio Buffer not full enough"); - return false; - } - // Copy 640 bytes (320 samples over 20 ms) from the ring buffer // The first 320 bytes (160 samples over 10 ms) will be from history size_t bytes_read = this->ring_buffer_->read((void *) (this->preprocessor_audio_buffer_ + HISTORY_SAMPLES_TO_KEEP), diff --git a/esphome/components/micro_wake_word/micro_wake_word.h b/esphome/components/micro_wake_word/micro_wake_word.h index 82f28b2ebb..27d05c3e09 100644 --- a/esphome/components/micro_wake_word/micro_wake_word.h +++ b/esphome/components/micro_wake_word/micro_wake_word.h @@ -66,6 +66,7 @@ class MicroWakeWord : public Component { void setup() override; void loop() override; float get_setup_priority() const override; + void dump_config() override; void start(); void stop(); @@ -74,6 +75,8 @@ class MicroWakeWord : public Component { bool initialize_models(); + std::string get_wake_word() { return this->wake_word_; } + // Increasing either of these will reduce the rate of false acceptances while increasing the false rejection rate void set_probability_cutoff(float probability_cutoff) { this->probability_cutoff_ = probability_cutoff; } void set_sliding_window_average_size(size_t size);