From b60c08dd2895b623a9a525cc540140ae84cdf4ff Mon Sep 17 00:00:00 2001 From: Jesse Hills <3060199+jesserockz@users.noreply.github.com> Date: Wed, 12 Apr 2023 11:45:10 +1200 Subject: [PATCH] Add push to talk voice assistant (#4648) * Add push to talk voice assistant * Refactor most code into voice_assistant * Make voice_assistant the component and remove push_to_talk (can be done in yaml) * Fix component setup * Always AF_INET to match serverside * Fix microphone and media player co-existence * Format * Update codeowners * Update test file * Fix endifs * nullptr not NULL * clang-tidy * Format * fixup: Add VA event data * Generate proto * Parse and log events * Add default to switch * Fix * Add mic/va to test5 --- CODEOWNERS | 4 + esphome/components/api/api.proto | 55 ++++++ esphome/components/api/api_connection.cpp | 30 +++ esphome/components/api/api_connection.h | 13 ++ esphome/components/api/api_frame_helper.h | 13 +- esphome/components/api/api_pb2.cpp | 185 ++++++++++++++++++ esphome/components/api/api_pb2.h | 71 +++++++ esphome/components/api/api_pb2_service.cpp | 60 ++++++ esphome/components/api/api_pb2_service.h | 18 ++ esphome/components/api/api_server.cpp | 13 ++ esphome/components/api/api_server.h | 5 + esphome/components/i2s_audio/__init__.py | 70 +++++++ esphome/components/i2s_audio/i2s_audio.cpp | 30 +++ esphome/components/i2s_audio/i2s_audio.h | 64 ++++++ .../__init__.py} | 53 +++-- .../i2s_audio_media_player.cpp | 95 ++++++--- .../i2s_audio_media_player.h | 29 ++- .../i2s_audio/microphone/__init__.py | 41 ++++ .../microphone/i2s_audio_microphone.cpp | 101 ++++++++++ .../microphone/i2s_audio_microphone.h | 37 ++++ .../components/improv_base/improv_base.cpp | 2 +- esphome/components/microphone/__init__.py | 91 +++++++++ esphome/components/microphone/automation.h | 32 +++ esphome/components/microphone/microphone.h | 33 ++++ .../components/socket/bsd_sockets_impl.cpp | 5 + .../components/socket/lwip_raw_tcp_impl.cpp | 4 + esphome/components/socket/socket.cpp | 20 +- esphome/components/socket/socket.h | 8 +- .../components/voice_assistant/__init__.py | 57 ++++++ .../voice_assistant/voice_assistant.cpp | 148 ++++++++++++++ .../voice_assistant/voice_assistant.h | 50 +++++ esphome/const.py | 1 + esphome/core/defines.h | 1 + esphome/core/helpers.h | 3 + tests/test4.yaml | 17 +- 35 files changed, 1384 insertions(+), 75 deletions(-) create mode 100644 esphome/components/i2s_audio/i2s_audio.cpp create mode 100644 esphome/components/i2s_audio/i2s_audio.h rename esphome/components/i2s_audio/{media_player.py => media_player/__init__.py} (68%) rename esphome/components/i2s_audio/{ => media_player}/i2s_audio_media_player.cpp (72%) rename esphome/components/i2s_audio/{ => media_player}/i2s_audio_media_player.h (81%) create mode 100644 esphome/components/i2s_audio/microphone/__init__.py create mode 100644 esphome/components/i2s_audio/microphone/i2s_audio_microphone.cpp create mode 100644 esphome/components/i2s_audio/microphone/i2s_audio_microphone.h create mode 100644 esphome/components/microphone/__init__.py create mode 100644 esphome/components/microphone/automation.h create mode 100644 esphome/components/microphone/microphone.h create mode 100644 esphome/components/voice_assistant/__init__.py create mode 100644 esphome/components/voice_assistant/voice_assistant.cpp create mode 100644 esphome/components/voice_assistant/voice_assistant.h diff --git a/CODEOWNERS b/CODEOWNERS index 76156db6e6..8e606d253a 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -111,6 +111,8 @@ esphome/components/hte501/* @Stock-M esphome/components/hydreon_rgxx/* @functionpointer esphome/components/i2c/* @esphome/core esphome/components/i2s_audio/* @jesserockz +esphome/components/i2s_audio/media_player/* @jesserockz +esphome/components/i2s_audio/microphone/* @jesserockz esphome/components/ili9xxx/* @nielsnl68 esphome/components/improv_base/* @esphome/core esphome/components/improv_serial/* @esphome/core @@ -154,6 +156,7 @@ esphome/components/mcp9808/* @k7hpn esphome/components/md5/* @esphome/core esphome/components/mdns/* @esphome/core esphome/components/media_player/* @jesserockz +esphome/components/microphone/* @jesserockz esphome/components/mics_4514/* @jesserockz esphome/components/midea/* @dudanov esphome/components/midea_ir/* @dudanov @@ -287,6 +290,7 @@ esphome/components/ufire_ise/* @pvizeli esphome/components/ultrasonic/* @OttoWinter esphome/components/vbus/* @ssieb esphome/components/version/* @esphome/core +esphome/components/voice_assistant/* @jesserockz esphome/components/wake_on_lan/* @willwill2will54 esphome/components/web_server_base/* @OttoWinter esphome/components/whirlpool/* @glmnet diff --git a/esphome/components/api/api.proto b/esphome/components/api/api.proto index e36f0581ca..f31ef3ffc0 100644 --- a/esphome/components/api/api.proto +++ b/esphome/components/api/api.proto @@ -55,6 +55,7 @@ service APIConnection { rpc subscribe_bluetooth_connections_free(SubscribeBluetoothConnectionsFreeRequest) returns (BluetoothConnectionsFreeResponse) {} rpc unsubscribe_bluetooth_le_advertisements(UnsubscribeBluetoothLEAdvertisementsRequest) returns (void) {} + rpc subscribe_voice_assistant(SubscribeVoiceAssistantRequest) returns (void) {} } @@ -210,6 +211,8 @@ message DeviceInfoResponse { string manufacturer = 12; string friendly_name = 13; + + uint32 voice_assistant_version = 14; } message ListEntitiesRequest { @@ -1379,3 +1382,55 @@ message BluetoothDeviceClearCacheResponse { bool success = 2; int32 error = 3; } + +// ==================== PUSH TO TALK ==================== +message SubscribeVoiceAssistantRequest { + option (id) = 89; + option (source) = SOURCE_CLIENT; + option (ifdef) = "USE_VOICE_ASSISTANT"; + + bool subscribe = 1; +} + +message VoiceAssistantRequest { + option (id) = 90; + option (source) = SOURCE_SERVER; + option (ifdef) = "USE_VOICE_ASSISTANT"; + + bool start = 1; +} + +message VoiceAssistantResponse { + option (id) = 91; + option (source) = SOURCE_CLIENT; + option (ifdef) = "USE_VOICE_ASSISTANT"; + + uint32 port = 1; + bool error = 2; +} + +enum VoiceAssistantEvent { + VOICE_ASSISTANT_ERROR = 0; + VOICE_ASSISTANT_RUN_START = 1; + VOICE_ASSISTANT_RUN_END = 2; + VOICE_ASSISTANT_STT_START = 3; + VOICE_ASSISTANT_STT_END = 4; + VOICE_ASSISTANT_INTENT_START = 5; + VOICE_ASSISTANT_INTENT_END = 6; + VOICE_ASSISTANT_TTS_START = 7; + VOICE_ASSISTANT_TTS_END = 8; +} + +message VoiceAssistantEventData { + string name = 1; + string value = 2; +} + +message VoiceAssistantEventResponse { + option (id) = 92; + option (source) = SOURCE_CLIENT; + option (ifdef) = "USE_VOICE_ASSISTANT"; + + VoiceAssistantEvent event_type = 1; + repeated VoiceAssistantEventData data = 2; +} diff --git a/esphome/components/api/api_connection.cpp b/esphome/components/api/api_connection.cpp index 104560771e..96fb3ea9fa 100644 --- a/esphome/components/api/api_connection.cpp +++ b/esphome/components/api/api_connection.cpp @@ -16,6 +16,9 @@ #ifdef USE_BLUETOOTH_PROXY #include "esphome/components/bluetooth_proxy/bluetooth_proxy.h" #endif +#ifdef USE_VOICE_ASSISTANT +#include "esphome/components/voice_assistant/voice_assistant.h" +#endif namespace esphome { namespace api { @@ -893,6 +896,30 @@ BluetoothConnectionsFreeResponse APIConnection::subscribe_bluetooth_connections_ } #endif +#ifdef USE_VOICE_ASSISTANT +bool APIConnection::request_voice_assistant(bool start) { + if (!this->voice_assistant_subscription_) + return false; + VoiceAssistantRequest msg; + msg.start = start; + return this->send_voice_assistant_request(msg); +} +void APIConnection::on_voice_assistant_response(const VoiceAssistantResponse &msg) { + if (voice_assistant::global_voice_assistant != nullptr) { + struct sockaddr_storage storage; + socklen_t len = sizeof(storage); + this->helper_->getpeername((struct sockaddr *) &storage, &len); + voice_assistant::global_voice_assistant->start(&storage, msg.port); + } +}; +void APIConnection::on_voice_assistant_event_response(const VoiceAssistantEventResponse &msg) { + if (voice_assistant::global_voice_assistant != nullptr) { + voice_assistant::global_voice_assistant->on_event(msg); + } +} + +#endif + bool APIConnection::send_log_message(int level, const char *tag, const char *line) { if (this->log_subscription_ < level) return false; @@ -970,6 +997,9 @@ DeviceInfoResponse APIConnection::device_info(const DeviceInfoRequest &msg) { resp.bluetooth_proxy_version = bluetooth_proxy::global_bluetooth_proxy->has_active() ? bluetooth_proxy::ACTIVE_CONNECTIONS_VERSION : bluetooth_proxy::PASSIVE_ONLY_VERSION; +#endif +#ifdef USE_VOICE_ASSISTANT + resp.voice_assistant_version = 1; #endif return resp; } diff --git a/esphome/components/api/api_connection.h b/esphome/components/api/api_connection.h index c85c69a2b9..78ecbb98e6 100644 --- a/esphome/components/api/api_connection.h +++ b/esphome/components/api/api_connection.h @@ -6,6 +6,7 @@ #include "api_server.h" #include "esphome/core/application.h" #include "esphome/core/component.h" +#include "esphome/core/defines.h" #include @@ -123,6 +124,15 @@ class APIConnection : public APIServerConnection { } #endif +#ifdef USE_VOICE_ASSISTANT + void subscribe_voice_assistant(const SubscribeVoiceAssistantRequest &msg) override { + this->voice_assistant_subscription_ = msg.subscribe; + } + bool request_voice_assistant(bool start); + void on_voice_assistant_response(const VoiceAssistantResponse &msg) override; + void on_voice_assistant_event_response(const VoiceAssistantEventResponse &msg) override; +#endif + void on_disconnect_response(const DisconnectResponse &value) override; void on_ping_response(const PingResponse &value) override { // we initiated ping @@ -203,6 +213,9 @@ class APIConnection : public APIServerConnection { bool service_call_subscription_{false}; #ifdef USE_BLUETOOTH_PROXY bool bluetooth_le_advertisement_subscription_{false}; +#endif +#ifdef USE_VOICE_ASSISTANT + bool voice_assistant_subscription_{false}; #endif bool next_close_ = false; APIServer *parent_; diff --git a/esphome/components/api/api_frame_helper.h b/esphome/components/api/api_frame_helper.h index 348a9b574f..bf4872d2d6 100644 --- a/esphome/components/api/api_frame_helper.h +++ b/esphome/components/api/api_frame_helper.h @@ -10,8 +10,8 @@ #include "noise/protocol.h" #endif -#include "esphome/components/socket/socket.h" #include "api_noise_context.h" +#include "esphome/components/socket/socket.h" namespace esphome { namespace api { @@ -67,6 +67,7 @@ class APIFrameHelper { virtual bool can_write_without_blocking() = 0; virtual APIError write_packet(uint16_t type, const uint8_t *data, size_t len) = 0; virtual std::string getpeername() = 0; + virtual int getpeername(struct sockaddr *addr, socklen_t *addrlen) = 0; virtual APIError close() = 0; virtual APIError shutdown(int how) = 0; // Give this helper a name for logging @@ -84,7 +85,10 @@ class APINoiseFrameHelper : public APIFrameHelper { APIError read_packet(ReadPacketBuffer *buffer) override; bool can_write_without_blocking() override; APIError write_packet(uint16_t type, const uint8_t *payload, size_t len) override; - std::string getpeername() override { return socket_->getpeername(); } + std::string getpeername() override { return this->socket_->getpeername(); } + int getpeername(struct sockaddr *addr, socklen_t *addrlen) override { + return this->socket_->getpeername(addr, addrlen); + } APIError close() override; APIError shutdown(int how) override; // Give this helper a name for logging @@ -144,7 +148,10 @@ class APIPlaintextFrameHelper : public APIFrameHelper { APIError read_packet(ReadPacketBuffer *buffer) override; bool can_write_without_blocking() override; APIError write_packet(uint16_t type, const uint8_t *payload, size_t len) override; - std::string getpeername() override { return socket_->getpeername(); } + std::string getpeername() override { return this->socket_->getpeername(); } + int getpeername(struct sockaddr *addr, socklen_t *addrlen) override { + return this->socket_->getpeername(addr, addrlen); + } APIError close() override; APIError shutdown(int how) override; // Give this helper a name for logging diff --git a/esphome/components/api/api_pb2.cpp b/esphome/components/api/api_pb2.cpp index 6260020064..334cde16b3 100644 --- a/esphome/components/api/api_pb2.cpp +++ b/esphome/components/api/api_pb2.cpp @@ -407,6 +407,32 @@ const char *proto_enum_to_string(enums::Bluet } } #endif +#ifdef HAS_PROTO_MESSAGE_DUMP +template<> const char *proto_enum_to_string(enums::VoiceAssistantEvent value) { + switch (value) { + case enums::VOICE_ASSISTANT_ERROR: + return "VOICE_ASSISTANT_ERROR"; + case enums::VOICE_ASSISTANT_RUN_START: + return "VOICE_ASSISTANT_RUN_START"; + case enums::VOICE_ASSISTANT_RUN_END: + return "VOICE_ASSISTANT_RUN_END"; + case enums::VOICE_ASSISTANT_STT_START: + return "VOICE_ASSISTANT_STT_START"; + case enums::VOICE_ASSISTANT_STT_END: + return "VOICE_ASSISTANT_STT_END"; + case enums::VOICE_ASSISTANT_INTENT_START: + return "VOICE_ASSISTANT_INTENT_START"; + case enums::VOICE_ASSISTANT_INTENT_END: + return "VOICE_ASSISTANT_INTENT_END"; + case enums::VOICE_ASSISTANT_TTS_START: + return "VOICE_ASSISTANT_TTS_START"; + case enums::VOICE_ASSISTANT_TTS_END: + return "VOICE_ASSISTANT_TTS_END"; + default: + return "UNKNOWN"; + } +} +#endif bool HelloRequest::decode_varint(uint32_t field_id, ProtoVarInt value) { switch (field_id) { case 2: { @@ -594,6 +620,10 @@ bool DeviceInfoResponse::decode_varint(uint32_t field_id, ProtoVarInt value) { this->bluetooth_proxy_version = value.as_uint32(); return true; } + case 14: { + this->voice_assistant_version = value.as_uint32(); + return true; + } default: return false; } @@ -654,6 +684,7 @@ void DeviceInfoResponse::encode(ProtoWriteBuffer buffer) const { buffer.encode_uint32(11, this->bluetooth_proxy_version); buffer.encode_string(12, this->manufacturer); buffer.encode_string(13, this->friendly_name); + buffer.encode_uint32(14, this->voice_assistant_version); } #ifdef HAS_PROTO_MESSAGE_DUMP void DeviceInfoResponse::dump_to(std::string &out) const { @@ -712,6 +743,11 @@ void DeviceInfoResponse::dump_to(std::string &out) const { out.append(" friendly_name: "); out.append("'").append(this->friendly_name).append("'"); out.append("\n"); + + out.append(" voice_assistant_version: "); + sprintf(buffer, "%u", this->voice_assistant_version); + out.append(buffer); + out.append("\n"); out.append("}"); } #endif @@ -6111,6 +6147,155 @@ void BluetoothDeviceClearCacheResponse::dump_to(std::string &out) const { out.append("}"); } #endif +bool SubscribeVoiceAssistantRequest::decode_varint(uint32_t field_id, ProtoVarInt value) { + switch (field_id) { + case 1: { + this->subscribe = value.as_bool(); + return true; + } + default: + return false; + } +} +void SubscribeVoiceAssistantRequest::encode(ProtoWriteBuffer buffer) const { buffer.encode_bool(1, this->subscribe); } +#ifdef HAS_PROTO_MESSAGE_DUMP +void SubscribeVoiceAssistantRequest::dump_to(std::string &out) const { + __attribute__((unused)) char buffer[64]; + out.append("SubscribeVoiceAssistantRequest {\n"); + out.append(" subscribe: "); + out.append(YESNO(this->subscribe)); + out.append("\n"); + out.append("}"); +} +#endif +bool VoiceAssistantRequest::decode_varint(uint32_t field_id, ProtoVarInt value) { + switch (field_id) { + case 1: { + this->start = value.as_bool(); + return true; + } + default: + return false; + } +} +void VoiceAssistantRequest::encode(ProtoWriteBuffer buffer) const { buffer.encode_bool(1, this->start); } +#ifdef HAS_PROTO_MESSAGE_DUMP +void VoiceAssistantRequest::dump_to(std::string &out) const { + __attribute__((unused)) char buffer[64]; + out.append("VoiceAssistantRequest {\n"); + out.append(" start: "); + out.append(YESNO(this->start)); + out.append("\n"); + out.append("}"); +} +#endif +bool VoiceAssistantResponse::decode_varint(uint32_t field_id, ProtoVarInt value) { + switch (field_id) { + case 1: { + this->port = value.as_uint32(); + return true; + } + case 2: { + this->error = value.as_bool(); + return true; + } + default: + return false; + } +} +void VoiceAssistantResponse::encode(ProtoWriteBuffer buffer) const { + buffer.encode_uint32(1, this->port); + buffer.encode_bool(2, this->error); +} +#ifdef HAS_PROTO_MESSAGE_DUMP +void VoiceAssistantResponse::dump_to(std::string &out) const { + __attribute__((unused)) char buffer[64]; + out.append("VoiceAssistantResponse {\n"); + out.append(" port: "); + sprintf(buffer, "%u", this->port); + out.append(buffer); + out.append("\n"); + + out.append(" error: "); + out.append(YESNO(this->error)); + out.append("\n"); + out.append("}"); +} +#endif +bool VoiceAssistantEventData::decode_length(uint32_t field_id, ProtoLengthDelimited value) { + switch (field_id) { + case 1: { + this->name = value.as_string(); + return true; + } + case 2: { + this->value = value.as_string(); + return true; + } + default: + return false; + } +} +void VoiceAssistantEventData::encode(ProtoWriteBuffer buffer) const { + buffer.encode_string(1, this->name); + buffer.encode_string(2, this->value); +} +#ifdef HAS_PROTO_MESSAGE_DUMP +void VoiceAssistantEventData::dump_to(std::string &out) const { + __attribute__((unused)) char buffer[64]; + out.append("VoiceAssistantEventData {\n"); + out.append(" name: "); + out.append("'").append(this->name).append("'"); + out.append("\n"); + + out.append(" value: "); + out.append("'").append(this->value).append("'"); + out.append("\n"); + out.append("}"); +} +#endif +bool VoiceAssistantEventResponse::decode_varint(uint32_t field_id, ProtoVarInt value) { + switch (field_id) { + case 1: { + this->event_type = value.as_enum(); + return true; + } + default: + return false; + } +} +bool VoiceAssistantEventResponse::decode_length(uint32_t field_id, ProtoLengthDelimited value) { + switch (field_id) { + case 2: { + this->data.push_back(value.as_message()); + return true; + } + default: + return false; + } +} +void VoiceAssistantEventResponse::encode(ProtoWriteBuffer buffer) const { + buffer.encode_enum(1, this->event_type); + for (auto &it : this->data) { + buffer.encode_message(2, it, true); + } +} +#ifdef HAS_PROTO_MESSAGE_DUMP +void VoiceAssistantEventResponse::dump_to(std::string &out) const { + __attribute__((unused)) char buffer[64]; + out.append("VoiceAssistantEventResponse {\n"); + out.append(" event_type: "); + out.append(proto_enum_to_string(this->event_type)); + out.append("\n"); + + for (const auto &it : this->data) { + out.append(" data: "); + it.dump_to(out); + out.append("\n"); + } + out.append("}"); +} +#endif } // namespace api } // namespace esphome diff --git a/esphome/components/api/api_pb2.h b/esphome/components/api/api_pb2.h index ade9b9cc8f..9f71c07913 100644 --- a/esphome/components/api/api_pb2.h +++ b/esphome/components/api/api_pb2.h @@ -165,6 +165,17 @@ enum BluetoothDeviceRequestType : uint32_t { BLUETOOTH_DEVICE_REQUEST_TYPE_CONNECT_V3_WITHOUT_CACHE = 5, BLUETOOTH_DEVICE_REQUEST_TYPE_CLEAR_CACHE = 6, }; +enum VoiceAssistantEvent : uint32_t { + VOICE_ASSISTANT_ERROR = 0, + VOICE_ASSISTANT_RUN_START = 1, + VOICE_ASSISTANT_RUN_END = 2, + VOICE_ASSISTANT_STT_START = 3, + VOICE_ASSISTANT_STT_END = 4, + VOICE_ASSISTANT_INTENT_START = 5, + VOICE_ASSISTANT_INTENT_END = 6, + VOICE_ASSISTANT_TTS_START = 7, + VOICE_ASSISTANT_TTS_END = 8, +}; } // namespace enums @@ -279,6 +290,7 @@ class DeviceInfoResponse : public ProtoMessage { uint32_t bluetooth_proxy_version{0}; std::string manufacturer{}; std::string friendly_name{}; + uint32_t voice_assistant_version{0}; void encode(ProtoWriteBuffer buffer) const override; #ifdef HAS_PROTO_MESSAGE_DUMP void dump_to(std::string &out) const override; @@ -1577,6 +1589,65 @@ class BluetoothDeviceClearCacheResponse : public ProtoMessage { protected: bool decode_varint(uint32_t field_id, ProtoVarInt value) override; }; +class SubscribeVoiceAssistantRequest : public ProtoMessage { + public: + bool subscribe{false}; + void encode(ProtoWriteBuffer buffer) const override; +#ifdef HAS_PROTO_MESSAGE_DUMP + void dump_to(std::string &out) const override; +#endif + + protected: + bool decode_varint(uint32_t field_id, ProtoVarInt value) override; +}; +class VoiceAssistantRequest : public ProtoMessage { + public: + bool start{false}; + void encode(ProtoWriteBuffer buffer) const override; +#ifdef HAS_PROTO_MESSAGE_DUMP + void dump_to(std::string &out) const override; +#endif + + protected: + bool decode_varint(uint32_t field_id, ProtoVarInt value) override; +}; +class VoiceAssistantResponse : public ProtoMessage { + public: + uint32_t port{0}; + bool error{false}; + void encode(ProtoWriteBuffer buffer) const override; +#ifdef HAS_PROTO_MESSAGE_DUMP + void dump_to(std::string &out) const override; +#endif + + protected: + bool decode_varint(uint32_t field_id, ProtoVarInt value) override; +}; +class VoiceAssistantEventData : public ProtoMessage { + public: + std::string name{}; + std::string value{}; + void encode(ProtoWriteBuffer buffer) const override; +#ifdef HAS_PROTO_MESSAGE_DUMP + void dump_to(std::string &out) const override; +#endif + + protected: + bool decode_length(uint32_t field_id, ProtoLengthDelimited value) override; +}; +class VoiceAssistantEventResponse : public ProtoMessage { + public: + enums::VoiceAssistantEvent event_type{}; + std::vector data{}; + void encode(ProtoWriteBuffer buffer) const override; +#ifdef HAS_PROTO_MESSAGE_DUMP + void dump_to(std::string &out) const override; +#endif + + protected: + bool decode_length(uint32_t field_id, ProtoLengthDelimited value) override; + bool decode_varint(uint32_t field_id, ProtoVarInt value) override; +}; } // namespace api } // namespace esphome diff --git a/esphome/components/api/api_pb2_service.cpp b/esphome/components/api/api_pb2_service.cpp index 7d019e1d3d..df36d0fdea 100644 --- a/esphome/components/api/api_pb2_service.cpp +++ b/esphome/components/api/api_pb2_service.cpp @@ -453,6 +453,20 @@ bool APIServerConnectionBase::send_bluetooth_device_clear_cache_response(const B return this->send_message_(msg, 88); } #endif +#ifdef USE_VOICE_ASSISTANT +#endif +#ifdef USE_VOICE_ASSISTANT +bool APIServerConnectionBase::send_voice_assistant_request(const VoiceAssistantRequest &msg) { +#ifdef HAS_PROTO_MESSAGE_DUMP + ESP_LOGVV(TAG, "send_voice_assistant_request: %s", msg.dump().c_str()); +#endif + return this->send_message_(msg, 90); +} +#endif +#ifdef USE_VOICE_ASSISTANT +#endif +#ifdef USE_VOICE_ASSISTANT +#endif bool APIServerConnectionBase::read_message(uint32_t msg_size, uint32_t msg_type, uint8_t *msg_data) { switch (msg_type) { case 1: { @@ -827,6 +841,39 @@ bool APIServerConnectionBase::read_message(uint32_t msg_size, uint32_t msg_type, ESP_LOGVV(TAG, "on_unsubscribe_bluetooth_le_advertisements_request: %s", msg.dump().c_str()); #endif this->on_unsubscribe_bluetooth_le_advertisements_request(msg); +#endif + break; + } + case 89: { +#ifdef USE_VOICE_ASSISTANT + SubscribeVoiceAssistantRequest msg; + msg.decode(msg_data, msg_size); +#ifdef HAS_PROTO_MESSAGE_DUMP + ESP_LOGVV(TAG, "on_subscribe_voice_assistant_request: %s", msg.dump().c_str()); +#endif + this->on_subscribe_voice_assistant_request(msg); +#endif + break; + } + case 91: { +#ifdef USE_VOICE_ASSISTANT + VoiceAssistantResponse msg; + msg.decode(msg_data, msg_size); +#ifdef HAS_PROTO_MESSAGE_DUMP + ESP_LOGVV(TAG, "on_voice_assistant_response: %s", msg.dump().c_str()); +#endif + this->on_voice_assistant_response(msg); +#endif + break; + } + case 92: { +#ifdef USE_VOICE_ASSISTANT + VoiceAssistantEventResponse msg; + msg.decode(msg_data, msg_size); +#ifdef HAS_PROTO_MESSAGE_DUMP + ESP_LOGVV(TAG, "on_voice_assistant_event_response: %s", msg.dump().c_str()); +#endif + this->on_voice_assistant_event_response(msg); #endif break; } @@ -1226,6 +1273,19 @@ void APIServerConnection::on_unsubscribe_bluetooth_le_advertisements_request( this->unsubscribe_bluetooth_le_advertisements(msg); } #endif +#ifdef USE_VOICE_ASSISTANT +void APIServerConnection::on_subscribe_voice_assistant_request(const SubscribeVoiceAssistantRequest &msg) { + if (!this->is_connection_setup()) { + this->on_no_setup_connection(); + return; + } + if (!this->is_authenticated()) { + this->on_unauthenticated_access(); + return; + } + this->subscribe_voice_assistant(msg); +} +#endif } // namespace api } // namespace esphome diff --git a/esphome/components/api/api_pb2_service.h b/esphome/components/api/api_pb2_service.h index 457a3d28a9..3808f128a4 100644 --- a/esphome/components/api/api_pb2_service.h +++ b/esphome/components/api/api_pb2_service.h @@ -224,6 +224,18 @@ class APIServerConnectionBase : public ProtoService { #endif #ifdef USE_BLUETOOTH_PROXY bool send_bluetooth_device_clear_cache_response(const BluetoothDeviceClearCacheResponse &msg); +#endif +#ifdef USE_VOICE_ASSISTANT + virtual void on_subscribe_voice_assistant_request(const SubscribeVoiceAssistantRequest &value){}; +#endif +#ifdef USE_VOICE_ASSISTANT + bool send_voice_assistant_request(const VoiceAssistantRequest &msg); +#endif +#ifdef USE_VOICE_ASSISTANT + virtual void on_voice_assistant_response(const VoiceAssistantResponse &value){}; +#endif +#ifdef USE_VOICE_ASSISTANT + virtual void on_voice_assistant_event_response(const VoiceAssistantEventResponse &value){}; #endif protected: bool read_message(uint32_t msg_size, uint32_t msg_type, uint8_t *msg_data) override; @@ -306,6 +318,9 @@ class APIServerConnection : public APIServerConnectionBase { #endif #ifdef USE_BLUETOOTH_PROXY virtual void unsubscribe_bluetooth_le_advertisements(const UnsubscribeBluetoothLEAdvertisementsRequest &msg) = 0; +#endif +#ifdef USE_VOICE_ASSISTANT + virtual void subscribe_voice_assistant(const SubscribeVoiceAssistantRequest &msg) = 0; #endif protected: void on_hello_request(const HelloRequest &msg) override; @@ -384,6 +399,9 @@ class APIServerConnection : public APIServerConnectionBase { void on_unsubscribe_bluetooth_le_advertisements_request( const UnsubscribeBluetoothLEAdvertisementsRequest &msg) override; #endif +#ifdef USE_VOICE_ASSISTANT + void on_subscribe_voice_assistant_request(const SubscribeVoiceAssistantRequest &msg) override; +#endif }; } // namespace api diff --git a/esphome/components/api/api_server.cpp b/esphome/components/api/api_server.cpp index c60766b364..fbef4b253f 100644 --- a/esphome/components/api/api_server.cpp +++ b/esphome/components/api/api_server.cpp @@ -427,5 +427,18 @@ void APIServer::on_shutdown() { delay(10); } +#ifdef USE_VOICE_ASSISTANT +void APIServer::start_voice_assistant() { + for (auto &c : this->clients_) { + c->request_voice_assistant(true); + } +} +void APIServer::stop_voice_assistant() { + for (auto &c : this->clients_) { + c->request_voice_assistant(false); + } +} +#endif + } // namespace api } // namespace esphome diff --git a/esphome/components/api/api_server.h b/esphome/components/api/api_server.h index db87affdb8..30103b2e3f 100644 --- a/esphome/components/api/api_server.h +++ b/esphome/components/api/api_server.h @@ -95,6 +95,11 @@ class APIServer : public Component, public Controller { void request_time(); #endif +#ifdef USE_VOICE_ASSISTANT + void start_voice_assistant(); + void stop_voice_assistant(); +#endif + bool is_connected() const; struct HomeAssistantStateSubscription { diff --git a/esphome/components/i2s_audio/__init__.py b/esphome/components/i2s_audio/__init__.py index e69de29bb2..1773d3082f 100644 --- a/esphome/components/i2s_audio/__init__.py +++ b/esphome/components/i2s_audio/__init__.py @@ -0,0 +1,70 @@ +import esphome.config_validation as cv +import esphome.final_validate as fv +import esphome.codegen as cg + +from esphome import pins +from esphome.const import CONF_ID +from esphome.components.esp32 import get_esp32_variant +from esphome.components.esp32.const import ( + VARIANT_ESP32, + VARIANT_ESP32S2, + VARIANT_ESP32S3, + VARIANT_ESP32C3, +) + +CODEOWNERS = ["@jesserockz"] +DEPENDENCIES = ["esp32"] +MULTI_CONF = True + +CONF_I2S_DOUT_PIN = "i2s_dout_pin" +CONF_I2S_DIN_PIN = "i2s_din_pin" +CONF_I2S_BCLK_PIN = "i2s_bclk_pin" +CONF_I2S_LRCLK_PIN = "i2s_lrclk_pin" + +CONF_I2S_AUDIO = "i2s_audio" +CONF_I2S_AUDIO_ID = "i2s_audio_id" + +i2s_audio_ns = cg.esphome_ns.namespace("i2s_audio") +I2SAudioComponent = i2s_audio_ns.class_("I2SAudioComponent", cg.Component) +I2SAudioIn = i2s_audio_ns.class_("I2SAudioIn", cg.Parented.template(I2SAudioComponent)) +I2SAudioOut = i2s_audio_ns.class_( + "I2SAudioOut", cg.Parented.template(I2SAudioComponent) +) + +# https://github.com/espressif/esp-idf/blob/master/components/soc/{variant}/include/soc/soc_caps.h +I2S_PORTS = { + VARIANT_ESP32: 2, + VARIANT_ESP32S2: 1, + VARIANT_ESP32S3: 2, + VARIANT_ESP32C3: 1, +} + +CONFIG_SCHEMA = cv.Schema( + { + cv.GenerateID(): cv.declare_id(I2SAudioComponent), + cv.Required(CONF_I2S_BCLK_PIN): pins.internal_gpio_output_pin_number, + cv.Required(CONF_I2S_LRCLK_PIN): pins.internal_gpio_output_pin_number, + } +) + + +def _final_validate(_): + i2s_audio_configs = fv.full_config.get()[CONF_I2S_AUDIO] + variant = get_esp32_variant() + if variant not in I2S_PORTS: + raise cv.Invalid(f"Unsupported variant {variant}") + if len(i2s_audio_configs) > I2S_PORTS[variant]: + raise cv.Invalid( + f"Only {I2S_PORTS[variant]} I2S audio ports are supported on {variant}" + ) + + +FINAL_VALIDATE_SCHEMA = _final_validate + + +async def to_code(config): + var = cg.new_Pvariable(config[CONF_ID]) + await cg.register_component(var, config) + + cg.add(var.set_bclk_pin(config[CONF_I2S_BCLK_PIN])) + cg.add(var.set_lrclk_pin(config[CONF_I2S_LRCLK_PIN])) diff --git a/esphome/components/i2s_audio/i2s_audio.cpp b/esphome/components/i2s_audio/i2s_audio.cpp new file mode 100644 index 0000000000..c1a608c064 --- /dev/null +++ b/esphome/components/i2s_audio/i2s_audio.cpp @@ -0,0 +1,30 @@ +#include "i2s_audio.h" + +#ifdef USE_ESP32 + +#include "esphome/core/log.h" + +namespace esphome { +namespace i2s_audio { + +static const char *const TAG = "i2s_audio"; + +void I2SAudioComponent::setup() { + static i2s_port_t next_port_num = I2S_NUM_0; + + if (next_port_num >= I2S_NUM_MAX) { + ESP_LOGE(TAG, "Too many I2S Audio components!"); + this->mark_failed(); + return; + } + + this->port_ = next_port_num; + next_port_num = (i2s_port_t) (next_port_num + 1); + + ESP_LOGCONFIG(TAG, "Setting up I2S Audio..."); +} + +} // namespace i2s_audio +} // namespace esphome + +#endif // USE_ESP32 diff --git a/esphome/components/i2s_audio/i2s_audio.h b/esphome/components/i2s_audio/i2s_audio.h new file mode 100644 index 0000000000..6b3fa10f3c --- /dev/null +++ b/esphome/components/i2s_audio/i2s_audio.h @@ -0,0 +1,64 @@ +#pragma once + +#ifdef USE_ESP32 + +#include +#include "esphome/core/component.h" +#include "esphome/core/helpers.h" + +namespace esphome { +namespace i2s_audio { + +class I2SAudioComponent; + +class I2SAudioIn : public Parented {}; + +class I2SAudioOut : public Parented {}; + +class I2SAudioComponent : public Component { + public: + void setup() override; + + void register_audio_in(I2SAudioIn *in) { + this->audio_in_ = in; + in->set_parent(this); + } + void register_audio_out(I2SAudioOut *out) { + this->audio_out_ = out; + out->set_parent(this); + } + + i2s_pin_config_t get_pin_config() const { + return { + .mck_io_num = I2S_PIN_NO_CHANGE, + .bck_io_num = this->bclk_pin_, + .ws_io_num = this->lrclk_pin_, + .data_out_num = I2S_PIN_NO_CHANGE, + .data_in_num = I2S_PIN_NO_CHANGE, + }; + } + + void set_bclk_pin(uint8_t pin) { this->bclk_pin_ = pin; } + void set_lrclk_pin(uint8_t pin) { this->lrclk_pin_ = pin; } + + void lock() { this->lock_.lock(); } + bool try_lock() { return this->lock_.try_lock(); } + void unlock() { this->lock_.unlock(); } + + i2s_port_t get_port() const { return this->port_; } + + protected: + Mutex lock_; + + I2SAudioIn *audio_in_{nullptr}; + I2SAudioOut *audio_out_{nullptr}; + + uint8_t bclk_pin_; + uint8_t lrclk_pin_; + i2s_port_t port_{}; +}; + +} // namespace i2s_audio +} // namespace esphome + +#endif // USE_ESP32 diff --git a/esphome/components/i2s_audio/media_player.py b/esphome/components/i2s_audio/media_player/__init__.py similarity index 68% rename from esphome/components/i2s_audio/media_player.py rename to esphome/components/i2s_audio/media_player/__init__.py index 43a48a721e..4ccb9cfc0a 100644 --- a/esphome/components/i2s_audio/media_player.py +++ b/esphome/components/i2s_audio/media_player/__init__.py @@ -5,22 +5,25 @@ import esphome.config_validation as cv from esphome import pins from esphome.const import CONF_ID, CONF_MODE -from esphome.core import CORE + +from .. import ( + i2s_audio_ns, + I2SAudioComponent, + I2SAudioOut, + CONF_I2S_AUDIO_ID, + CONF_I2S_DOUT_PIN, +) CODEOWNERS = ["@jesserockz"] -DEPENDENCIES = ["esp32"] - -i2s_audio_ns = cg.esphome_ns.namespace("i2s_audio") +DEPENDENCIES = ["i2s_audio"] I2SAudioMediaPlayer = i2s_audio_ns.class_( - "I2SAudioMediaPlayer", cg.Component, media_player.MediaPlayer + "I2SAudioMediaPlayer", cg.Component, media_player.MediaPlayer, I2SAudioOut ) i2s_dac_mode_t = cg.global_ns.enum("i2s_dac_mode_t") -CONF_I2S_DOUT_PIN = "i2s_dout_pin" -CONF_I2S_BCLK_PIN = "i2s_bclk_pin" -CONF_I2S_LRCLK_PIN = "i2s_lrclk_pin" + CONF_MUTE_PIN = "mute_pin" CONF_AUDIO_ID = "audio_id" CONF_DAC_TYPE = "dac_type" @@ -48,34 +51,26 @@ def validate_esp32_variant(config): CONFIG_SCHEMA = cv.All( cv.typed_schema( { - "internal": cv.Schema( + "internal": media_player.MEDIA_PLAYER_SCHEMA.extend( { cv.GenerateID(): cv.declare_id(I2SAudioMediaPlayer), + cv.GenerateID(CONF_I2S_AUDIO_ID): cv.use_id(I2SAudioComponent), cv.Required(CONF_MODE): cv.enum(INTERNAL_DAC_OPTIONS, lower=True), } - ) - .extend(media_player.MEDIA_PLAYER_SCHEMA) - .extend(cv.COMPONENT_SCHEMA), - "external": cv.Schema( + ).extend(cv.COMPONENT_SCHEMA), + "external": media_player.MEDIA_PLAYER_SCHEMA.extend( { cv.GenerateID(): cv.declare_id(I2SAudioMediaPlayer), + cv.GenerateID(CONF_I2S_AUDIO_ID): cv.use_id(I2SAudioComponent), cv.Required( CONF_I2S_DOUT_PIN ): pins.internal_gpio_output_pin_number, - cv.Required( - CONF_I2S_BCLK_PIN - ): pins.internal_gpio_output_pin_number, - cv.Required( - CONF_I2S_LRCLK_PIN - ): pins.internal_gpio_output_pin_number, cv.Optional(CONF_MUTE_PIN): pins.gpio_output_pin_schema, cv.Optional(CONF_MODE, default="mono"): cv.one_of( *EXTERNAL_DAC_OPTIONS, lower=True ), } - ) - .extend(media_player.MEDIA_PLAYER_SCHEMA) - .extend(cv.COMPONENT_SCHEMA), + ).extend(cv.COMPONENT_SCHEMA), }, key=CONF_DAC_TYPE, ), @@ -89,19 +84,19 @@ async def to_code(config): await cg.register_component(var, config) await media_player.register_media_player(var, config) + parent = await cg.get_variable(config[CONF_I2S_AUDIO_ID]) + cg.add(parent.register_audio_out(var)) + if config[CONF_DAC_TYPE] == "internal": cg.add(var.set_internal_dac_mode(config[CONF_MODE])) else: cg.add(var.set_dout_pin(config[CONF_I2S_DOUT_PIN])) - cg.add(var.set_bclk_pin(config[CONF_I2S_BCLK_PIN])) - cg.add(var.set_lrclk_pin(config[CONF_I2S_LRCLK_PIN])) if CONF_MUTE_PIN in config: pin = await cg.gpio_pin_expression(config[CONF_MUTE_PIN]) cg.add(var.set_mute_pin(pin)) cg.add(var.set_external_dac_channels(2 if config[CONF_MODE] == "stereo" else 1)) - if CORE.is_esp32: - cg.add_library("WiFiClientSecure", None) - cg.add_library("HTTPClient", None) - cg.add_library("esphome/ESP32-audioI2S", "2.0.6") - cg.add_build_flag("-DAUDIO_NO_SD_FS") + cg.add_library("WiFiClientSecure", None) + cg.add_library("HTTPClient", None) + cg.add_library("esphome/ESP32-audioI2S", "2.0.6") + cg.add_build_flag("-DAUDIO_NO_SD_FS") diff --git a/esphome/components/i2s_audio/i2s_audio_media_player.cpp b/esphome/components/i2s_audio/media_player/i2s_audio_media_player.cpp similarity index 72% rename from esphome/components/i2s_audio/i2s_audio_media_player.cpp rename to esphome/components/i2s_audio/media_player/i2s_audio_media_player.cpp index 2b00a5ec26..64f83a5ea6 100644 --- a/esphome/components/i2s_audio/i2s_audio_media_player.cpp +++ b/esphome/components/i2s_audio/media_player/i2s_audio_media_player.cpp @@ -11,11 +11,19 @@ static const char *const TAG = "audio"; void I2SAudioMediaPlayer::control(const media_player::MediaPlayerCall &call) { if (call.get_media_url().has_value()) { - if (this->audio_->isRunning()) - this->audio_->stopSong(); - this->high_freq_.start(); - this->audio_->connecttohost(call.get_media_url().value().c_str()); - this->state = media_player::MEDIA_PLAYER_STATE_PLAYING; + this->current_url_ = call.get_media_url(); + + if (this->state == media_player::MEDIA_PLAYER_STATE_PLAYING && this->audio_ != nullptr) { + if (this->audio_->isRunning()) { + this->audio_->stopSong(); + } + this->audio_->connecttohost(this->current_url_.value().c_str()); + } else { + this->start(); + } + } + if (this->i2s_state_ != I2S_STATE_RUNNING) { + return; } if (call.get_volume().has_value()) { this->volume = call.get_volume().value(); @@ -35,7 +43,7 @@ void I2SAudioMediaPlayer::control(const media_player::MediaPlayerCall &call) { this->state = media_player::MEDIA_PLAYER_STATE_PAUSED; break; case media_player::MEDIA_PLAYER_COMMAND_STOP: - this->stop_(); + this->stop(); break; case media_player::MEDIA_PLAYER_COMMAND_MUTE: this->mute_(); @@ -94,22 +102,51 @@ void I2SAudioMediaPlayer::set_volume_(float volume, bool publish) { this->volume = volume; } -void I2SAudioMediaPlayer::stop_() { - if (this->audio_->isRunning()) - this->audio_->stopSong(); - this->high_freq_.stop(); +void I2SAudioMediaPlayer::setup() { + ESP_LOGCONFIG(TAG, "Setting up Audio..."); this->state = media_player::MEDIA_PLAYER_STATE_IDLE; } -void I2SAudioMediaPlayer::setup() { - ESP_LOGCONFIG(TAG, "Setting up Audio..."); +void I2SAudioMediaPlayer::loop() { + switch (this->i2s_state_) { + case I2S_STATE_STARTING: + this->start_(); + break; + case I2S_STATE_RUNNING: + this->play_(); + break; + case I2S_STATE_STOPPING: + this->stop_(); + break; + case I2S_STATE_STOPPED: + break; + } +} + +void I2SAudioMediaPlayer::play_() { + this->audio_->loop(); + if (this->state == media_player::MEDIA_PLAYER_STATE_PLAYING && !this->audio_->isRunning()) { + this->stop(); + } +} + +void I2SAudioMediaPlayer::start() { this->i2s_state_ = I2S_STATE_STARTING; } +void I2SAudioMediaPlayer::start_() { + if (this->parent_->try_lock()) { + return; // Waiting for another i2s to return lock + } + #if SOC_I2S_SUPPORTS_DAC if (this->internal_dac_mode_ != I2S_DAC_CHANNEL_DISABLE) { - this->audio_ = make_unique