From ce8a77c7650453015c02be78103b0bab21114d34 Mon Sep 17 00:00:00 2001 From: Jesse Hills <3060199+jesserockz@users.noreply.github.com> Date: Mon, 8 May 2023 10:36:17 +1200 Subject: [PATCH] Speaker support (#4743) --- CODEOWNERS | 2 + esphome/components/api/api_connection.cpp | 2 +- esphome/components/i2s_audio/i2s_audio.h | 12 - .../i2s_audio/media_player/__init__.py | 3 +- .../i2s_audio/microphone/__init__.py | 3 +- .../components/i2s_audio/speaker/__init__.py | 87 ++++++++ .../i2s_audio/speaker/i2s_audio_speaker.cpp | 208 ++++++++++++++++++ .../i2s_audio/speaker/i2s_audio_speaker.h | 81 +++++++ esphome/components/speaker/__init__.py | 87 ++++++++ esphome/components/speaker/automation.h | 48 ++++ esphome/components/speaker/speaker.h | 27 +++ .../components/voice_assistant/__init__.py | 9 +- .../voice_assistant/voice_assistant.cpp | 38 ++++ .../voice_assistant/voice_assistant.h | 24 ++ esphome/const.py | 1 + esphome/core/defines.h | 2 + tests/test4.yaml | 7 + 17 files changed, 622 insertions(+), 19 deletions(-) create mode 100644 esphome/components/i2s_audio/speaker/__init__.py create mode 100644 esphome/components/i2s_audio/speaker/i2s_audio_speaker.cpp create mode 100644 esphome/components/i2s_audio/speaker/i2s_audio_speaker.h create mode 100644 esphome/components/speaker/__init__.py create mode 100644 esphome/components/speaker/automation.h create mode 100644 esphome/components/speaker/speaker.h diff --git a/CODEOWNERS b/CODEOWNERS index 3032e7dd88..d3216db695 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -115,6 +115,7 @@ esphome/components/i2c/* @esphome/core esphome/components/i2s_audio/* @jesserockz esphome/components/i2s_audio/media_player/* @jesserockz esphome/components/i2s_audio/microphone/* @jesserockz +esphome/components/i2s_audio/speaker/* @jesserockz esphome/components/ili9xxx/* @nielsnl68 esphome/components/improv_base/* @esphome/core esphome/components/improv_serial/* @esphome/core @@ -245,6 +246,7 @@ esphome/components/smt100/* @piechade esphome/components/sn74hc165/* @jesserockz esphome/components/socket/* @esphome/core esphome/components/sonoff_d1/* @anatoly-savchenkov +esphome/components/speaker/* @jesserockz esphome/components/spi/* @esphome/core esphome/components/sprinkler/* @kbx81 esphome/components/sps30/* @martgras diff --git a/esphome/components/api/api_connection.cpp b/esphome/components/api/api_connection.cpp index a79444a7e9..a08cb39a4a 100644 --- a/esphome/components/api/api_connection.cpp +++ b/esphome/components/api/api_connection.cpp @@ -996,7 +996,7 @@ DeviceInfoResponse APIConnection::device_info(const DeviceInfoRequest &msg) { : bluetooth_proxy::PASSIVE_ONLY_VERSION; #endif #ifdef USE_VOICE_ASSISTANT - resp.voice_assistant_version = 1; + resp.voice_assistant_version = voice_assistant::global_voice_assistant->get_version(); #endif return resp; } diff --git a/esphome/components/i2s_audio/i2s_audio.h b/esphome/components/i2s_audio/i2s_audio.h index 6b3fa10f3c..b2fb1ca27c 100644 --- a/esphome/components/i2s_audio/i2s_audio.h +++ b/esphome/components/i2s_audio/i2s_audio.h @@ -19,15 +19,6 @@ class I2SAudioComponent : public Component { public: void setup() override; - void register_audio_in(I2SAudioIn *in) { - this->audio_in_ = in; - in->set_parent(this); - } - void register_audio_out(I2SAudioOut *out) { - this->audio_out_ = out; - out->set_parent(this); - } - i2s_pin_config_t get_pin_config() const { return { .mck_io_num = I2S_PIN_NO_CHANGE, @@ -50,9 +41,6 @@ class I2SAudioComponent : public Component { protected: Mutex lock_; - I2SAudioIn *audio_in_{nullptr}; - I2SAudioOut *audio_out_{nullptr}; - uint8_t bclk_pin_; uint8_t lrclk_pin_; i2s_port_t port_{}; diff --git a/esphome/components/i2s_audio/media_player/__init__.py b/esphome/components/i2s_audio/media_player/__init__.py index 4ccb9cfc0a..57c3b1b5b6 100644 --- a/esphome/components/i2s_audio/media_player/__init__.py +++ b/esphome/components/i2s_audio/media_player/__init__.py @@ -84,8 +84,7 @@ async def to_code(config): await cg.register_component(var, config) await media_player.register_media_player(var, config) - parent = await cg.get_variable(config[CONF_I2S_AUDIO_ID]) - cg.add(parent.register_audio_out(var)) + await cg.register_parented(var, config[CONF_I2S_AUDIO_ID]) if config[CONF_DAC_TYPE] == "internal": cg.add(var.set_internal_dac_mode(config[CONF_MODE])) diff --git a/esphome/components/i2s_audio/microphone/__init__.py b/esphome/components/i2s_audio/microphone/__init__.py index 5abc13a044..d2c73cf0d0 100644 --- a/esphome/components/i2s_audio/microphone/__init__.py +++ b/esphome/components/i2s_audio/microphone/__init__.py @@ -33,8 +33,7 @@ async def to_code(config): var = cg.new_Pvariable(config[CONF_ID]) await cg.register_component(var, config) - parent = await cg.get_variable(config[CONF_I2S_AUDIO_ID]) - cg.add(parent.register_audio_in(var)) + await cg.register_parented(var, config[CONF_I2S_AUDIO_ID]) cg.add(var.set_din_pin(config[CONF_I2S_DIN_PIN])) diff --git a/esphome/components/i2s_audio/speaker/__init__.py b/esphome/components/i2s_audio/speaker/__init__.py new file mode 100644 index 0000000000..72455af1b7 --- /dev/null +++ b/esphome/components/i2s_audio/speaker/__init__.py @@ -0,0 +1,87 @@ +import esphome.codegen as cg +import esphome.config_validation as cv +from esphome import pins +from esphome.const import CONF_ID, CONF_MODE +from esphome.components import esp32, speaker + +from .. import ( + CONF_I2S_AUDIO_ID, + CONF_I2S_DOUT_PIN, + I2SAudioComponent, + I2SAudioOut, + i2s_audio_ns, +) + +CODEOWNERS = ["@jesserockz"] +DEPENDENCIES = ["i2s_audio"] + +I2SAudioSpeaker = i2s_audio_ns.class_( + "I2SAudioSpeaker", cg.Component, speaker.Speaker, I2SAudioOut +) + +i2s_dac_mode_t = cg.global_ns.enum("i2s_dac_mode_t") + +CONF_MUTE_PIN = "mute_pin" +CONF_DAC_TYPE = "dac_type" + +INTERNAL_DAC_OPTIONS = { + "left": i2s_dac_mode_t.I2S_DAC_CHANNEL_LEFT_EN, + "right": i2s_dac_mode_t.I2S_DAC_CHANNEL_RIGHT_EN, + "stereo": i2s_dac_mode_t.I2S_DAC_CHANNEL_BOTH_EN, +} + +EXTERNAL_DAC_OPTIONS = ["mono", "stereo"] + +NO_INTERNAL_DAC_VARIANTS = [esp32.const.VARIANT_ESP32S2] + + +def validate_esp32_variant(config): + if config[CONF_DAC_TYPE] != "internal": + return config + variant = esp32.get_esp32_variant() + if variant in NO_INTERNAL_DAC_VARIANTS: + raise cv.Invalid(f"{variant} does not have an internal DAC") + return config + + +CONFIG_SCHEMA = cv.All( + cv.typed_schema( + { + "internal": speaker.SPEAKER_SCHEMA.extend( + { + cv.GenerateID(): cv.declare_id(I2SAudioSpeaker), + cv.GenerateID(CONF_I2S_AUDIO_ID): cv.use_id(I2SAudioComponent), + cv.Required(CONF_MODE): cv.enum(INTERNAL_DAC_OPTIONS, lower=True), + } + ).extend(cv.COMPONENT_SCHEMA), + "external": speaker.SPEAKER_SCHEMA.extend( + { + cv.GenerateID(): cv.declare_id(I2SAudioSpeaker), + cv.GenerateID(CONF_I2S_AUDIO_ID): cv.use_id(I2SAudioComponent), + cv.Required( + CONF_I2S_DOUT_PIN + ): pins.internal_gpio_output_pin_number, + cv.Optional(CONF_MODE, default="mono"): cv.one_of( + *EXTERNAL_DAC_OPTIONS, lower=True + ), + } + ).extend(cv.COMPONENT_SCHEMA), + }, + key=CONF_DAC_TYPE, + ), + validate_esp32_variant, +) + + +async def to_code(config): + var = cg.new_Pvariable(config[CONF_ID]) + await cg.register_component(var, config) + await speaker.register_speaker(var, config) + + await cg.register_parented(var, config[CONF_I2S_AUDIO_ID]) + + if config[CONF_DAC_TYPE] == "internal": + cg.add(var.set_internal_dac_mode(config[CONF_MODE])) + else: + cg.add(var.set_dout_pin(config[CONF_I2S_DOUT_PIN])) + cg.add(var.set_external_dac_channels(2 if config[CONF_MODE] == "stereo" else 1)) diff --git a/esphome/components/i2s_audio/speaker/i2s_audio_speaker.cpp b/esphome/components/i2s_audio/speaker/i2s_audio_speaker.cpp new file mode 100644 index 0000000000..fa41a70277 --- /dev/null +++ b/esphome/components/i2s_audio/speaker/i2s_audio_speaker.cpp @@ -0,0 +1,208 @@ +#include "i2s_audio_speaker.h" + +#ifdef USE_ESP32 + +#include + +#include "esphome/core/application.h" +#include "esphome/core/hal.h" +#include "esphome/core/log.h" + +namespace esphome { +namespace i2s_audio { + +static const size_t BUFFER_COUNT = 10; + +static const char *const TAG = "i2s_audio.speaker"; + +void I2SAudioSpeaker::setup() { + ESP_LOGCONFIG(TAG, "Setting up I2S Audio Speaker..."); + + this->buffer_queue_ = xQueueCreate(BUFFER_COUNT, sizeof(DataEvent)); + this->event_queue_ = xQueueCreate(20, sizeof(TaskEvent)); +} + +void I2SAudioSpeaker::start() { this->state_ = speaker::STATE_STARTING; } +void I2SAudioSpeaker::start_() { + if (!this->parent_->try_lock()) { + return; // Waiting for another i2s component to return lock + } + this->state_ = speaker::STATE_RUNNING; + + xTaskCreate(I2SAudioSpeaker::player_task, "speaker_task", 8192, (void *) this, 0, &this->player_task_handle_); +} + +void I2SAudioSpeaker::player_task(void *params) { + I2SAudioSpeaker *this_speaker = (I2SAudioSpeaker *) params; + + TaskEvent event; + event.type = TaskEventType::STARTING; + xQueueSend(this_speaker->event_queue_, &event, portMAX_DELAY); + + i2s_driver_config_t config = { + .mode = (i2s_mode_t) (I2S_MODE_MASTER | I2S_MODE_TX), + .sample_rate = 16000, + .bits_per_sample = I2S_BITS_PER_SAMPLE_16BIT, + .channel_format = I2S_CHANNEL_FMT_RIGHT_LEFT, + .communication_format = I2S_COMM_FORMAT_STAND_I2S, + .intr_alloc_flags = ESP_INTR_FLAG_LEVEL1, + .dma_buf_count = 8, + .dma_buf_len = 1024, + .use_apll = false, + .tx_desc_auto_clear = true, + .fixed_mclk = I2S_PIN_NO_CHANGE, + .mclk_multiple = I2S_MCLK_MULTIPLE_DEFAULT, + .bits_per_chan = I2S_BITS_PER_CHAN_DEFAULT, + }; +#if SOC_I2S_SUPPORTS_DAC + if (this_speaker->internal_dac_mode_ != I2S_DAC_CHANNEL_DISABLE) { + config.mode = (i2s_mode_t) (config.mode | I2S_MODE_DAC_BUILT_IN); + } +#endif + + i2s_driver_install(this_speaker->parent_->get_port(), &config, 0, nullptr); + +#if SOC_I2S_SUPPORTS_DAC + if (this_speaker->internal_dac_mode_ == I2S_DAC_CHANNEL_DISABLE) { +#endif + i2s_pin_config_t pin_config = this_speaker->parent_->get_pin_config(); + pin_config.data_out_num = this_speaker->dout_pin_; + + i2s_set_pin(this_speaker->parent_->get_port(), &pin_config); +#if SOC_I2S_SUPPORTS_DAC + } else { + i2s_set_dac_mode(this_speaker->internal_dac_mode_); + } +#endif + + DataEvent data_event; + + event.type = TaskEventType::STARTED; + xQueueSend(this_speaker->event_queue_, &event, portMAX_DELAY); + + int16_t buffer[BUFFER_SIZE / 2]; + + while (true) { + if (xQueueReceive(this_speaker->buffer_queue_, &data_event, 100 / portTICK_PERIOD_MS) != pdTRUE) { + break; // End of audio from main thread + } + if (data_event.stop) { + // Stop signal from main thread + while (xQueueReceive(this_speaker->buffer_queue_, &data_event, 0) == pdTRUE) { + // Flush queue + } + break; + } + size_t bytes_written; + + memmove(buffer, data_event.data, data_event.len); + size_t remaining = data_event.len / 2; + size_t current = 0; + + while (remaining > 0) { + uint32_t sample = (buffer[current] << 16) | (buffer[current] & 0xFFFF); + + esp_err_t err = i2s_write(this_speaker->parent_->get_port(), &sample, sizeof(sample), &bytes_written, + (100 / portTICK_PERIOD_MS)); + if (err != ESP_OK) { + event = {.type = TaskEventType::WARNING, .err = err}; + xQueueSend(this_speaker->event_queue_, &event, portMAX_DELAY); + continue; + } + remaining--; + current++; + } + + event.type = TaskEventType::PLAYING; + xQueueSend(this_speaker->event_queue_, &event, portMAX_DELAY); + } + + i2s_zero_dma_buffer(this_speaker->parent_->get_port()); + + event.type = TaskEventType::STOPPING; + xQueueSend(this_speaker->event_queue_, &event, portMAX_DELAY); + + i2s_stop(this_speaker->parent_->get_port()); + i2s_driver_uninstall(this_speaker->parent_->get_port()); + + event.type = TaskEventType::STOPPED; + xQueueSend(this_speaker->event_queue_, &event, portMAX_DELAY); + + while (true) { + delay(10); + } +} + +void I2SAudioSpeaker::stop() { + if (this->state_ == speaker::STATE_STOPPED) + return; + this->state_ = speaker::STATE_STOPPING; + DataEvent data; + data.stop = true; + xQueueSendToFront(this->buffer_queue_, &data, portMAX_DELAY); +} + +void I2SAudioSpeaker::watch_() { + TaskEvent event; + if (xQueueReceive(this->event_queue_, &event, 0) == pdTRUE) { + switch (event.type) { + case TaskEventType::STARTING: + case TaskEventType::STARTED: + case TaskEventType::STOPPING: + break; + case TaskEventType::PLAYING: + this->status_clear_warning(); + break; + case TaskEventType::STOPPED: + this->parent_->unlock(); + this->state_ = speaker::STATE_STOPPED; + vTaskDelete(this->player_task_handle_); + this->player_task_handle_ = nullptr; + break; + case TaskEventType::WARNING: + ESP_LOGW(TAG, "Error writing to I2S: %s", esp_err_to_name(event.err)); + this->status_set_warning(); + break; + } + } +} + +void I2SAudioSpeaker::loop() { + switch (this->state_) { + case speaker::STATE_STARTING: + this->start_(); + break; + case speaker::STATE_RUNNING: + this->watch_(); + break; + case speaker::STATE_STOPPING: + case speaker::STATE_STOPPED: + break; + } +} + +bool I2SAudioSpeaker::play(const uint8_t *data, size_t length) { + if (this->state_ != speaker::STATE_RUNNING && this->state_ != speaker::STATE_STARTING) { + this->start(); + } + size_t remaining = length; + size_t index = 0; + while (remaining > 0) { + DataEvent event; + event.stop = false; + size_t to_send_length = std::min(remaining, BUFFER_SIZE); + event.len = to_send_length; + memcpy(event.data, data + index, to_send_length); + if (xQueueSend(this->buffer_queue_, &event, 100 / portTICK_PERIOD_MS) == pdTRUE) { + remaining -= to_send_length; + index += to_send_length; + } + App.feed_wdt(); + } + return true; +} + +} // namespace i2s_audio +} // namespace esphome + +#endif // USE_ESP32 diff --git a/esphome/components/i2s_audio/speaker/i2s_audio_speaker.h b/esphome/components/i2s_audio/speaker/i2s_audio_speaker.h new file mode 100644 index 0000000000..4f1d2172d7 --- /dev/null +++ b/esphome/components/i2s_audio/speaker/i2s_audio_speaker.h @@ -0,0 +1,81 @@ +#pragma once + +#ifdef USE_ESP32 + +#include "../i2s_audio.h" + +#include +#include +#include + +#include "esphome/components/speaker/speaker.h" +#include "esphome/core/component.h" +#include "esphome/core/gpio.h" +#include "esphome/core/helpers.h" + +namespace esphome { +namespace i2s_audio { + +static const size_t BUFFER_SIZE = 1024; + +enum class TaskEventType : uint8_t { + STARTING = 0, + STARTED, + PLAYING, + STOPPING, + STOPPED, + WARNING = 255, +}; + +struct TaskEvent { + TaskEventType type; + esp_err_t err; +}; + +struct DataEvent { + bool stop; + size_t len; + uint8_t data[BUFFER_SIZE]; +}; + +class I2SAudioSpeaker : public Component, public speaker::Speaker, public I2SAudioOut { + public: + float get_setup_priority() const override { return esphome::setup_priority::LATE; } + + void setup() override; + void loop() override; + + void set_dout_pin(uint8_t pin) { this->dout_pin_ = pin; } +#if SOC_I2S_SUPPORTS_DAC + void set_internal_dac_mode(i2s_dac_mode_t mode) { this->internal_dac_mode_ = mode; } +#endif + void set_external_dac_channels(uint8_t channels) { this->external_dac_channels_ = channels; } + + void start(); + void stop() override; + + bool play(const uint8_t *data, size_t length) override; + + protected: + void start_(); + // void stop_(); + void watch_(); + + static void player_task(void *params); + + TaskHandle_t player_task_handle_{nullptr}; + QueueHandle_t buffer_queue_; + QueueHandle_t event_queue_; + + uint8_t dout_pin_{0}; + +#if SOC_I2S_SUPPORTS_DAC + i2s_dac_mode_t internal_dac_mode_{I2S_DAC_CHANNEL_DISABLE}; +#endif + uint8_t external_dac_channels_; +}; + +} // namespace i2s_audio +} // namespace esphome + +#endif // USE_ESP32 diff --git a/esphome/components/speaker/__init__.py b/esphome/components/speaker/__init__.py new file mode 100644 index 0000000000..79d5df8c5a --- /dev/null +++ b/esphome/components/speaker/__init__.py @@ -0,0 +1,87 @@ +from esphome import automation +import esphome.config_validation as cv +import esphome.codegen as cg + +from esphome.automation import maybe_simple_id +from esphome.const import CONF_ID, CONF_DATA +from esphome.core import CORE +from esphome.coroutine import coroutine_with_priority + + +CODEOWNERS = ["@jesserockz"] + +IS_PLATFORM_COMPONENT = True + +speaker_ns = cg.esphome_ns.namespace("speaker") + +Speaker = speaker_ns.class_("Speaker") + +PlayAction = speaker_ns.class_( + "PlayAction", automation.Action, cg.Parented.template(Speaker) +) +StopAction = speaker_ns.class_( + "StopAction", automation.Action, cg.Parented.template(Speaker) +) + +IsPlayingCondition = speaker_ns.class_("IsPlayingCondition", automation.Condition) + + +async def setup_speaker_core_(var, config): + pass + + +async def register_speaker(var, config): + if not CORE.has_id(config[CONF_ID]): + var = cg.Pvariable(config[CONF_ID], var) + await setup_speaker_core_(var, config) + + +SPEAKER_SCHEMA = cv.Schema({}) + + +SPEAKER_AUTOMATION_SCHEMA = maybe_simple_id({cv.GenerateID(): cv.use_id(Speaker)}) + + +async def speaker_action(config, action_id, template_arg, args): + var = cg.new_Pvariable(action_id, template_arg) + await cg.register_parented(var, config[CONF_ID]) + return var + + +@automation.register_action( + "speaker.play", + PlayAction, + cv.maybe_simple_value( + { + cv.GenerateID(): cv.use_id(Speaker), + cv.Required(CONF_DATA): cv.templatable(cv.ensure_list(cv.hex_uint8_t)), + }, + key=CONF_DATA, + ), +) +async def speaker_play_action(config, action_id, template_arg, args): + var = cg.new_Pvariable(action_id, template_arg) + await cg.register_parented(var, config[CONF_ID]) + data = config[CONF_DATA] + + if cg.is_template(data): + templ = await cg.templatable(data, args, cg.std_vector.template(cg.uint8)) + cg.add(var.set_data_template(templ)) + else: + cg.add(var.set_data_static(data)) + return var + + +automation.register_action("speaker.stop", StopAction, SPEAKER_AUTOMATION_SCHEMA)( + speaker_action +) + +automation.register_condition( + "speaker.is_playing", IsPlayingCondition, SPEAKER_AUTOMATION_SCHEMA +)(speaker_action) + + +@coroutine_with_priority(100.0) +async def to_code(config): + cg.add_global(speaker_ns.using) + cg.add_define("USE_SPEAKER") diff --git a/esphome/components/speaker/automation.h b/esphome/components/speaker/automation.h new file mode 100644 index 0000000000..e28991a0d1 --- /dev/null +++ b/esphome/components/speaker/automation.h @@ -0,0 +1,48 @@ +#pragma once + +#include "esphome/core/automation.h" +#include "speaker.h" + +#include + +namespace esphome { +namespace speaker { + +template class PlayAction : public Action, public Parented { + public: + void set_data_template(std::function(Ts...)> func) { + this->data_func_ = func; + this->static_ = false; + } + void set_data_static(const std::vector &data) { + this->data_static_ = data; + this->static_ = true; + } + + void play(Ts... x) override { + if (this->static_) { + this->parent_->play(this->data_static_); + } else { + auto val = this->data_func_(x...); + this->parent_->play(val); + } + } + + protected: + bool static_{false}; + std::function(Ts...)> data_func_{}; + std::vector data_static_{}; +}; + +template class StopAction : public Action, public Parented { + public: + void play(Ts... x) override { this->parent_->stop(); } +}; + +template class IsPlayingCondition : public Condition, public Parented { + public: + bool check(Ts... x) override { return this->parent_->is_running(); } +}; + +} // namespace speaker +} // namespace esphome diff --git a/esphome/components/speaker/speaker.h b/esphome/components/speaker/speaker.h new file mode 100644 index 0000000000..5dfabfa40e --- /dev/null +++ b/esphome/components/speaker/speaker.h @@ -0,0 +1,27 @@ +#pragma once + +namespace esphome { +namespace speaker { + +enum State : uint8_t { + STATE_STOPPED = 0, + STATE_STARTING, + STATE_RUNNING, + STATE_STOPPING, +}; + +class Speaker { + public: + virtual bool play(const uint8_t *data, size_t length) = 0; + virtual bool play(const std::vector &data) { return this->play(data.data(), data.size()); } + + virtual void stop() = 0; + + bool is_running() const { return this->state_ == STATE_RUNNING; } + + protected: + State state_{STATE_STOPPED}; +}; + +} // namespace speaker +} // namespace esphome diff --git a/esphome/components/voice_assistant/__init__.py b/esphome/components/voice_assistant/__init__.py index 20698a1b82..624fcdf52c 100644 --- a/esphome/components/voice_assistant/__init__.py +++ b/esphome/components/voice_assistant/__init__.py @@ -1,10 +1,10 @@ import esphome.config_validation as cv import esphome.codegen as cg -from esphome.const import CONF_ID, CONF_MICROPHONE +from esphome.const import CONF_ID, CONF_MICROPHONE, CONF_SPEAKER from esphome import automation from esphome.automation import register_action -from esphome.components import microphone +from esphome.components import microphone, speaker AUTO_LOAD = ["socket"] DEPENDENCIES = ["api", "microphone"] @@ -34,6 +34,7 @@ CONFIG_SCHEMA = cv.Schema( { cv.GenerateID(): cv.declare_id(VoiceAssistant), cv.GenerateID(CONF_MICROPHONE): cv.use_id(microphone.Microphone), + cv.Optional(CONF_SPEAKER): cv.use_id(speaker.Speaker), cv.Optional(CONF_ON_START): automation.validate_automation(single=True), cv.Optional(CONF_ON_STT_END): automation.validate_automation(single=True), cv.Optional(CONF_ON_TTS_START): automation.validate_automation(single=True), @@ -51,6 +52,10 @@ async def to_code(config): mic = await cg.get_variable(config[CONF_MICROPHONE]) cg.add(var.set_microphone(mic)) + if CONF_SPEAKER in config: + spkr = await cg.get_variable(config[CONF_SPEAKER]) + cg.add(var.set_speaker(spkr)) + if CONF_ON_START in config: await automation.build_automation( var.get_start_trigger(), [], config[CONF_ON_START] diff --git a/esphome/components/voice_assistant/voice_assistant.cpp b/esphome/components/voice_assistant/voice_assistant.cpp index e2d5bea90a..4001779edd 100644 --- a/esphome/components/voice_assistant/voice_assistant.cpp +++ b/esphome/components/voice_assistant/voice_assistant.cpp @@ -2,6 +2,8 @@ #include "esphome/core/log.h" +#include + namespace esphome { namespace voice_assistant { @@ -33,6 +35,27 @@ void VoiceAssistant::setup() { return; } +#ifdef USE_SPEAKER + if (this->speaker_ != nullptr) { + struct sockaddr_storage server; + + socklen_t sl = socket::set_sockaddr_any((struct sockaddr *) &server, sizeof(server), 6055); + if (sl == 0) { + ESP_LOGW(TAG, "Socket unable to set sockaddr: errno %d", errno); + this->mark_failed(); + return; + } + server.ss_family = AF_INET; + + err = socket_->bind((struct sockaddr *) &server, sizeof(server)); + if (err != 0) { + ESP_LOGW(TAG, "Socket unable to bind: errno %d", errno); + this->mark_failed(); + return; + } + } +#endif + this->mic_->add_data_callback([this](const std::vector &data) { if (!this->running_) { return; @@ -41,6 +64,21 @@ void VoiceAssistant::setup() { }); } +void VoiceAssistant::loop() { +#ifdef USE_SPEAKER + if (this->speaker_ == nullptr) { + return; + } + + uint8_t buf[1024]; + auto len = this->socket_->read(buf, sizeof(buf)); + if (len == -1) { + return; + } + this->speaker_->play(buf, len); +#endif +} + void VoiceAssistant::start(struct sockaddr_storage *addr, uint16_t port) { ESP_LOGD(TAG, "Starting..."); diff --git a/esphome/components/voice_assistant/voice_assistant.h b/esphome/components/voice_assistant/voice_assistant.h index 813c006e98..bcfdb75a75 100644 --- a/esphome/components/voice_assistant/voice_assistant.h +++ b/esphome/components/voice_assistant/voice_assistant.h @@ -2,23 +2,44 @@ #include "esphome/core/automation.h" #include "esphome/core/component.h" +#include "esphome/core/defines.h" #include "esphome/core/helpers.h" #include "esphome/components/api/api_pb2.h" #include "esphome/components/api/api_server.h" #include "esphome/components/microphone/microphone.h" +#ifdef USE_SPEAKER +#include "esphome/components/speaker/speaker.h" +#endif #include "esphome/components/socket/socket.h" namespace esphome { namespace voice_assistant { +// Version 1: Initial version +// Version 2: Adds raw speaker support +static const uint32_t INITIAL_VERSION = 1; +static const uint32_t SPEAKER_SUPPORT = 2; + class VoiceAssistant : public Component { public: void setup() override; + void loop() override; float get_setup_priority() const override; void start(struct sockaddr_storage *addr, uint16_t port); void set_microphone(microphone::Microphone *mic) { this->mic_ = mic; } +#ifdef USE_SPEAKER + void set_speaker(speaker::Speaker *speaker) { this->speaker_ = speaker; } +#endif + + uint32_t get_version() const { +#ifdef USE_SPEAKER + if (this->speaker_ != nullptr) + return SPEAKER_SUPPORT; +#endif + return INITIAL_VERSION; + } void request_start(); void signal_stop(); @@ -44,6 +65,9 @@ class VoiceAssistant : public Component { Trigger *error_trigger_ = new Trigger(); microphone::Microphone *mic_{nullptr}; +#ifdef USE_SPEAKER + speaker::Speaker *speaker_{nullptr}; +#endif bool running_{false}; }; diff --git a/esphome/const.py b/esphome/const.py index 2f66b47b8e..79f7f6cf68 100644 --- a/esphome/const.py +++ b/esphome/const.py @@ -659,6 +659,7 @@ CONF_SLEEP_WHEN_DONE = "sleep_when_done" CONF_SONY = "sony" CONF_SOURCE = "source" CONF_SOURCE_ID = "source_id" +CONF_SPEAKER = "speaker" CONF_SPEED = "speed" CONF_SPEED_COMMAND_TOPIC = "speed_command_topic" CONF_SPEED_COUNT = "speed_count" diff --git a/esphome/core/defines.h b/esphome/core/defines.h index 77d41e5b58..ef08eecbe5 100644 --- a/esphome/core/defines.h +++ b/esphome/core/defines.h @@ -73,6 +73,8 @@ #define USE_WIFI_11KV_SUPPORT #define USE_BLUETOOTH_PROXY #define USE_VOICE_ASSISTANT +#define USE_MICROPHONE +#define USE_SPEAKER #ifdef USE_ARDUINO #define USE_ARDUINO_VERSION_CODE VERSION_CODE(2, 0, 5) diff --git a/tests/test4.yaml b/tests/test4.yaml index 04d6f4678e..d253db8f70 100644 --- a/tests/test4.yaml +++ b/tests/test4.yaml @@ -703,6 +703,13 @@ microphone: id: mic_id i2s_din_pin: GPIO23 +speaker: + - platform: i2s_audio + id: speaker_id + dac_type: external + i2s_dout_pin: GPIO25 + mode: mono + voice_assistant: microphone: mic_id