Remote wake word support for voice assistant (#5229)

This commit is contained in:
Jesse Hills 2023-10-10 19:52:42 +13:00 committed by GitHub
parent 6b96089f02
commit 511af5845e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 744 additions and 211 deletions

View File

@ -1413,6 +1413,18 @@ message SubscribeVoiceAssistantRequest {
bool subscribe = 1;
}
enum VoiceAssistantRequestFlag {
VOICE_ASSISTANT_REQUEST_NONE = 0;
VOICE_ASSISTANT_REQUEST_USE_VAD = 1;
VOICE_ASSISTANT_REQUEST_USE_WAKE_WORD = 2;
}
message VoiceAssistantAudioSettings {
uint32 noise_suppression_level = 1;
uint32 auto_gain = 2;
float volume_multiplier = 3;
}
message VoiceAssistantRequest {
option (id) = 90;
option (source) = SOURCE_SERVER;
@ -1420,7 +1432,8 @@ message VoiceAssistantRequest {
bool start = 1;
string conversation_id = 2;
bool use_vad = 3;
uint32 flags = 3;
VoiceAssistantAudioSettings audio_settings = 4;
}
message VoiceAssistantResponse {
@ -1442,6 +1455,10 @@ enum VoiceAssistantEvent {
VOICE_ASSISTANT_INTENT_END = 6;
VOICE_ASSISTANT_TTS_START = 7;
VOICE_ASSISTANT_TTS_END = 8;
VOICE_ASSISTANT_WAKE_WORD_START = 9;
VOICE_ASSISTANT_WAKE_WORD_END = 10;
VOICE_ASSISTANT_STT_VAD_START = 11;
VOICE_ASSISTANT_STT_VAD_END = 12;
}
message VoiceAssistantEventData {

View File

@ -907,21 +907,22 @@ BluetoothConnectionsFreeResponse APIConnection::subscribe_bluetooth_connections_
#endif
#ifdef USE_VOICE_ASSISTANT
bool APIConnection::request_voice_assistant(bool start, const std::string &conversation_id, bool use_vad) {
bool APIConnection::request_voice_assistant(const VoiceAssistantRequest &msg) {
if (!this->voice_assistant_subscription_)
return false;
VoiceAssistantRequest msg;
msg.start = start;
msg.conversation_id = conversation_id;
msg.use_vad = use_vad;
return this->send_voice_assistant_request(msg);
}
void APIConnection::on_voice_assistant_response(const VoiceAssistantResponse &msg) {
if (voice_assistant::global_voice_assistant != nullptr) {
if (msg.error) {
voice_assistant::global_voice_assistant->failed_to_start();
return;
}
struct sockaddr_storage storage;
socklen_t len = sizeof(storage);
this->helper_->getpeername((struct sockaddr *) &storage, &len);
voice_assistant::global_voice_assistant->start(&storage, msg.port);
voice_assistant::global_voice_assistant->start_streaming(&storage, msg.port);
}
};
void APIConnection::on_voice_assistant_event_response(const VoiceAssistantEventResponse &msg) {

View File

@ -124,7 +124,7 @@ class APIConnection : public APIServerConnection {
void subscribe_voice_assistant(const SubscribeVoiceAssistantRequest &msg) override {
this->voice_assistant_subscription_ = msg.subscribe;
}
bool request_voice_assistant(bool start, const std::string &conversation_id, bool use_vad);
bool request_voice_assistant(const VoiceAssistantRequest &msg);
void on_voice_assistant_response(const VoiceAssistantResponse &msg) override;
void on_voice_assistant_event_response(const VoiceAssistantEventResponse &msg) override;
#endif

View File

@ -3,8 +3,6 @@
#include "api_pb2.h"
#include "esphome/core/log.h"
#include <cinttypes>
namespace esphome {
namespace api {
@ -410,6 +408,20 @@ const char *proto_enum_to_string<enums::BluetoothDeviceRequestType>(enums::Bluet
}
#endif
#ifdef HAS_PROTO_MESSAGE_DUMP
template<> const char *proto_enum_to_string<enums::VoiceAssistantRequestFlag>(enums::VoiceAssistantRequestFlag value) {
switch (value) {
case enums::VOICE_ASSISTANT_REQUEST_NONE:
return "VOICE_ASSISTANT_REQUEST_NONE";
case enums::VOICE_ASSISTANT_REQUEST_USE_VAD:
return "VOICE_ASSISTANT_REQUEST_USE_VAD";
case enums::VOICE_ASSISTANT_REQUEST_USE_WAKE_WORD:
return "VOICE_ASSISTANT_REQUEST_USE_WAKE_WORD";
default:
return "UNKNOWN";
}
}
#endif
#ifdef HAS_PROTO_MESSAGE_DUMP
template<> const char *proto_enum_to_string<enums::VoiceAssistantEvent>(enums::VoiceAssistantEvent value) {
switch (value) {
case enums::VOICE_ASSISTANT_ERROR:
@ -430,6 +442,14 @@ template<> const char *proto_enum_to_string<enums::VoiceAssistantEvent>(enums::V
return "VOICE_ASSISTANT_TTS_START";
case enums::VOICE_ASSISTANT_TTS_END:
return "VOICE_ASSISTANT_TTS_END";
case enums::VOICE_ASSISTANT_WAKE_WORD_START:
return "VOICE_ASSISTANT_WAKE_WORD_START";
case enums::VOICE_ASSISTANT_WAKE_WORD_END:
return "VOICE_ASSISTANT_WAKE_WORD_END";
case enums::VOICE_ASSISTANT_STT_VAD_START:
return "VOICE_ASSISTANT_STT_VAD_START";
case enums::VOICE_ASSISTANT_STT_VAD_END:
return "VOICE_ASSISTANT_STT_VAD_END";
default:
return "UNKNOWN";
}
@ -524,12 +544,12 @@ void HelloRequest::dump_to(std::string &out) const {
out.append("\n");
out.append(" api_version_major: ");
sprintf(buffer, "%" PRIu32, this->api_version_major);
sprintf(buffer, "%u", this->api_version_major);
out.append(buffer);
out.append("\n");
out.append(" api_version_minor: ");
sprintf(buffer, "%" PRIu32, this->api_version_minor);
sprintf(buffer, "%u", this->api_version_minor);
out.append(buffer);
out.append("\n");
out.append("}");
@ -574,12 +594,12 @@ void HelloResponse::dump_to(std::string &out) const {
__attribute__((unused)) char buffer[64];
out.append("HelloResponse {\n");
out.append(" api_version_major: ");
sprintf(buffer, "%" PRIu32, this->api_version_major);
sprintf(buffer, "%u", this->api_version_major);
out.append(buffer);
out.append("\n");
out.append(" api_version_minor: ");
sprintf(buffer, "%" PRIu32, this->api_version_minor);
sprintf(buffer, "%u", this->api_version_minor);
out.append(buffer);
out.append("\n");
@ -785,17 +805,17 @@ void DeviceInfoResponse::dump_to(std::string &out) const {
out.append("\n");
out.append(" webserver_port: ");
sprintf(buffer, "%" PRIu32, this->webserver_port);
sprintf(buffer, "%u", this->webserver_port);
out.append(buffer);
out.append("\n");
out.append(" legacy_bluetooth_proxy_version: ");
sprintf(buffer, "%" PRIu32, this->legacy_bluetooth_proxy_version);
sprintf(buffer, "%u", this->legacy_bluetooth_proxy_version);
out.append(buffer);
out.append("\n");
out.append(" bluetooth_proxy_feature_flags: ");
sprintf(buffer, "%" PRIu32, this->bluetooth_proxy_feature_flags);
sprintf(buffer, "%u", this->bluetooth_proxy_feature_flags);
out.append(buffer);
out.append("\n");
@ -808,7 +828,7 @@ void DeviceInfoResponse::dump_to(std::string &out) const {
out.append("\n");
out.append(" voice_assistant_version: ");
sprintf(buffer, "%" PRIu32, this->voice_assistant_version);
sprintf(buffer, "%u", this->voice_assistant_version);
out.append(buffer);
out.append("\n");
out.append("}");
@ -900,7 +920,7 @@ void ListEntitiesBinarySensorResponse::dump_to(std::string &out) const {
out.append("\n");
out.append(" key: ");
sprintf(buffer, "%" PRIu32, this->key);
sprintf(buffer, "%u", this->key);
out.append(buffer);
out.append("\n");
@ -968,7 +988,7 @@ void BinarySensorStateResponse::dump_to(std::string &out) const {
__attribute__((unused)) char buffer[64];
out.append("BinarySensorStateResponse {\n");
out.append(" key: ");
sprintf(buffer, "%" PRIu32, this->key);
sprintf(buffer, "%u", this->key);
out.append(buffer);
out.append("\n");
@ -1071,7 +1091,7 @@ void ListEntitiesCoverResponse::dump_to(std::string &out) const {
out.append("\n");
out.append(" key: ");
sprintf(buffer, "%" PRIu32, this->key);
sprintf(buffer, "%u", this->key);
out.append(buffer);
out.append("\n");
@ -1161,7 +1181,7 @@ void CoverStateResponse::dump_to(std::string &out) const {
__attribute__((unused)) char buffer[64];
out.append("CoverStateResponse {\n");
out.append(" key: ");
sprintf(buffer, "%" PRIu32, this->key);
sprintf(buffer, "%u", this->key);
out.append(buffer);
out.append("\n");
@ -1244,7 +1264,7 @@ void CoverCommandRequest::dump_to(std::string &out) const {
__attribute__((unused)) char buffer[64];
out.append("CoverCommandRequest {\n");
out.append(" key: ");
sprintf(buffer, "%" PRIu32, this->key);
sprintf(buffer, "%u", this->key);
out.append(buffer);
out.append("\n");
@ -1364,7 +1384,7 @@ void ListEntitiesFanResponse::dump_to(std::string &out) const {
out.append("\n");
out.append(" key: ");
sprintf(buffer, "%" PRIu32, this->key);
sprintf(buffer, "%u", this->key);
out.append(buffer);
out.append("\n");
@ -1389,7 +1409,7 @@ void ListEntitiesFanResponse::dump_to(std::string &out) const {
out.append("\n");
out.append(" supported_speed_count: ");
sprintf(buffer, "%" PRId32, this->supported_speed_count);
sprintf(buffer, "%d", this->supported_speed_count);
out.append(buffer);
out.append("\n");
@ -1456,7 +1476,7 @@ void FanStateResponse::dump_to(std::string &out) const {
__attribute__((unused)) char buffer[64];
out.append("FanStateResponse {\n");
out.append(" key: ");
sprintf(buffer, "%" PRIu32, this->key);
sprintf(buffer, "%u", this->key);
out.append(buffer);
out.append("\n");
@ -1477,7 +1497,7 @@ void FanStateResponse::dump_to(std::string &out) const {
out.append("\n");
out.append(" speed_level: ");
sprintf(buffer, "%" PRId32, this->speed_level);
sprintf(buffer, "%d", this->speed_level);
out.append(buffer);
out.append("\n");
out.append("}");
@ -1557,7 +1577,7 @@ void FanCommandRequest::dump_to(std::string &out) const {
__attribute__((unused)) char buffer[64];
out.append("FanCommandRequest {\n");
out.append(" key: ");
sprintf(buffer, "%" PRIu32, this->key);
sprintf(buffer, "%u", this->key);
out.append(buffer);
out.append("\n");
@ -1598,7 +1618,7 @@ void FanCommandRequest::dump_to(std::string &out) const {
out.append("\n");
out.append(" speed_level: ");
sprintf(buffer, "%" PRId32, this->speed_level);
sprintf(buffer, "%d", this->speed_level);
out.append(buffer);
out.append("\n");
out.append("}");
@ -1712,7 +1732,7 @@ void ListEntitiesLightResponse::dump_to(std::string &out) const {
out.append("\n");
out.append(" key: ");
sprintf(buffer, "%" PRIu32, this->key);
sprintf(buffer, "%u", this->key);
out.append(buffer);
out.append("\n");
@ -1866,7 +1886,7 @@ void LightStateResponse::dump_to(std::string &out) const {
__attribute__((unused)) char buffer[64];
out.append("LightStateResponse {\n");
out.append(" key: ");
sprintf(buffer, "%" PRIu32, this->key);
sprintf(buffer, "%u", this->key);
out.append(buffer);
out.append("\n");
@ -2089,7 +2109,7 @@ void LightCommandRequest::dump_to(std::string &out) const {
__attribute__((unused)) char buffer[64];
out.append("LightCommandRequest {\n");
out.append(" key: ");
sprintf(buffer, "%" PRIu32, this->key);
sprintf(buffer, "%u", this->key);
out.append(buffer);
out.append("\n");
@ -2187,7 +2207,7 @@ void LightCommandRequest::dump_to(std::string &out) const {
out.append("\n");
out.append(" transition_length: ");
sprintf(buffer, "%" PRIu32, this->transition_length);
sprintf(buffer, "%u", this->transition_length);
out.append(buffer);
out.append("\n");
@ -2196,7 +2216,7 @@ void LightCommandRequest::dump_to(std::string &out) const {
out.append("\n");
out.append(" flash_length: ");
sprintf(buffer, "%" PRIu32, this->flash_length);
sprintf(buffer, "%u", this->flash_length);
out.append(buffer);
out.append("\n");
@ -2304,7 +2324,7 @@ void ListEntitiesSensorResponse::dump_to(std::string &out) const {
out.append("\n");
out.append(" key: ");
sprintf(buffer, "%" PRIu32, this->key);
sprintf(buffer, "%u", this->key);
out.append(buffer);
out.append("\n");
@ -2325,7 +2345,7 @@ void ListEntitiesSensorResponse::dump_to(std::string &out) const {
out.append("\n");
out.append(" accuracy_decimals: ");
sprintf(buffer, "%" PRId32, this->accuracy_decimals);
sprintf(buffer, "%d", this->accuracy_decimals);
out.append(buffer);
out.append("\n");
@ -2389,7 +2409,7 @@ void SensorStateResponse::dump_to(std::string &out) const {
__attribute__((unused)) char buffer[64];
out.append("SensorStateResponse {\n");
out.append(" key: ");
sprintf(buffer, "%" PRIu32, this->key);
sprintf(buffer, "%u", this->key);
out.append(buffer);
out.append("\n");
@ -2478,7 +2498,7 @@ void ListEntitiesSwitchResponse::dump_to(std::string &out) const {
out.append("\n");
out.append(" key: ");
sprintf(buffer, "%" PRIu32, this->key);
sprintf(buffer, "%u", this->key);
out.append(buffer);
out.append("\n");
@ -2541,7 +2561,7 @@ void SwitchStateResponse::dump_to(std::string &out) const {
__attribute__((unused)) char buffer[64];
out.append("SwitchStateResponse {\n");
out.append(" key: ");
sprintf(buffer, "%" PRIu32, this->key);
sprintf(buffer, "%u", this->key);
out.append(buffer);
out.append("\n");
@ -2580,7 +2600,7 @@ void SwitchCommandRequest::dump_to(std::string &out) const {
__attribute__((unused)) char buffer[64];
out.append("SwitchCommandRequest {\n");
out.append(" key: ");
sprintf(buffer, "%" PRIu32, this->key);
sprintf(buffer, "%u", this->key);
out.append(buffer);
out.append("\n");
@ -2654,7 +2674,7 @@ void ListEntitiesTextSensorResponse::dump_to(std::string &out) const {
out.append("\n");
out.append(" key: ");
sprintf(buffer, "%" PRIu32, this->key);
sprintf(buffer, "%u", this->key);
out.append(buffer);
out.append("\n");
@ -2720,7 +2740,7 @@ void TextSensorStateResponse::dump_to(std::string &out) const {
__attribute__((unused)) char buffer[64];
out.append("TextSensorStateResponse {\n");
out.append(" key: ");
sprintf(buffer, "%" PRIu32, this->key);
sprintf(buffer, "%u", this->key);
out.append(buffer);
out.append("\n");
@ -3027,7 +3047,7 @@ void GetTimeResponse::dump_to(std::string &out) const {
__attribute__((unused)) char buffer[64];
out.append("GetTimeResponse {\n");
out.append(" epoch_seconds: ");
sprintf(buffer, "%" PRIu32, this->epoch_seconds);
sprintf(buffer, "%u", this->epoch_seconds);
out.append(buffer);
out.append("\n");
out.append("}");
@ -3111,7 +3131,7 @@ void ListEntitiesServicesResponse::dump_to(std::string &out) const {
out.append("\n");
out.append(" key: ");
sprintf(buffer, "%" PRIu32, this->key);
sprintf(buffer, "%u", this->key);
out.append(buffer);
out.append("\n");
@ -3205,7 +3225,7 @@ void ExecuteServiceArgument::dump_to(std::string &out) const {
out.append("\n");
out.append(" legacy_int: ");
sprintf(buffer, "%" PRId32, this->legacy_int);
sprintf(buffer, "%d", this->legacy_int);
out.append(buffer);
out.append("\n");
@ -3219,7 +3239,7 @@ void ExecuteServiceArgument::dump_to(std::string &out) const {
out.append("\n");
out.append(" int_: ");
sprintf(buffer, "%" PRId32, this->int_);
sprintf(buffer, "%d", this->int_);
out.append(buffer);
out.append("\n");
@ -3231,7 +3251,7 @@ void ExecuteServiceArgument::dump_to(std::string &out) const {
for (const auto &it : this->int_array) {
out.append(" int_array: ");
sprintf(buffer, "%" PRId32, it);
sprintf(buffer, "%d", it);
out.append(buffer);
out.append("\n");
}
@ -3282,7 +3302,7 @@ void ExecuteServiceRequest::dump_to(std::string &out) const {
__attribute__((unused)) char buffer[64];
out.append("ExecuteServiceRequest {\n");
out.append(" key: ");
sprintf(buffer, "%" PRIu32, this->key);
sprintf(buffer, "%u", this->key);
out.append(buffer);
out.append("\n");
@ -3358,7 +3378,7 @@ void ListEntitiesCameraResponse::dump_to(std::string &out) const {
out.append("\n");
out.append(" key: ");
sprintf(buffer, "%" PRIu32, this->key);
sprintf(buffer, "%u", this->key);
out.append(buffer);
out.append("\n");
@ -3424,7 +3444,7 @@ void CameraImageResponse::dump_to(std::string &out) const {
__attribute__((unused)) char buffer[64];
out.append("CameraImageResponse {\n");
out.append(" key: ");
sprintf(buffer, "%" PRIu32, this->key);
sprintf(buffer, "%u", this->key);
out.append(buffer);
out.append("\n");
@ -3616,7 +3636,7 @@ void ListEntitiesClimateResponse::dump_to(std::string &out) const {
out.append("\n");
out.append(" key: ");
sprintf(buffer, "%" PRIu32, this->key);
sprintf(buffer, "%u", this->key);
out.append(buffer);
out.append("\n");
@ -3804,7 +3824,7 @@ void ClimateStateResponse::dump_to(std::string &out) const {
__attribute__((unused)) char buffer[64];
out.append("ClimateStateResponse {\n");
out.append(" key: ");
sprintf(buffer, "%" PRIu32, this->key);
sprintf(buffer, "%u", this->key);
out.append(buffer);
out.append("\n");
@ -3992,7 +4012,7 @@ void ClimateCommandRequest::dump_to(std::string &out) const {
__attribute__((unused)) char buffer[64];
out.append("ClimateCommandRequest {\n");
out.append(" key: ");
sprintf(buffer, "%" PRIu32, this->key);
sprintf(buffer, "%u", this->key);
out.append(buffer);
out.append("\n");
@ -4175,7 +4195,7 @@ void ListEntitiesNumberResponse::dump_to(std::string &out) const {
out.append("\n");
out.append(" key: ");
sprintf(buffer, "%" PRIu32, this->key);
sprintf(buffer, "%u", this->key);
out.append(buffer);
out.append("\n");
@ -4262,7 +4282,7 @@ void NumberStateResponse::dump_to(std::string &out) const {
__attribute__((unused)) char buffer[64];
out.append("NumberStateResponse {\n");
out.append(" key: ");
sprintf(buffer, "%" PRIu32, this->key);
sprintf(buffer, "%u", this->key);
out.append(buffer);
out.append("\n");
@ -4300,7 +4320,7 @@ void NumberCommandRequest::dump_to(std::string &out) const {
__attribute__((unused)) char buffer[64];
out.append("NumberCommandRequest {\n");
out.append(" key: ");
sprintf(buffer, "%" PRIu32, this->key);
sprintf(buffer, "%u", this->key);
out.append(buffer);
out.append("\n");
@ -4382,7 +4402,7 @@ void ListEntitiesSelectResponse::dump_to(std::string &out) const {
out.append("\n");
out.append(" key: ");
sprintf(buffer, "%" PRIu32, this->key);
sprintf(buffer, "%u", this->key);
out.append(buffer);
out.append("\n");
@ -4454,7 +4474,7 @@ void SelectStateResponse::dump_to(std::string &out) const {
__attribute__((unused)) char buffer[64];
out.append("SelectStateResponse {\n");
out.append(" key: ");
sprintf(buffer, "%" PRIu32, this->key);
sprintf(buffer, "%u", this->key);
out.append(buffer);
out.append("\n");
@ -4497,7 +4517,7 @@ void SelectCommandRequest::dump_to(std::string &out) const {
__attribute__((unused)) char buffer[64];
out.append("SelectCommandRequest {\n");
out.append(" key: ");
sprintf(buffer, "%" PRIu32, this->key);
sprintf(buffer, "%u", this->key);
out.append(buffer);
out.append("\n");
@ -4591,7 +4611,7 @@ void ListEntitiesLockResponse::dump_to(std::string &out) const {
out.append("\n");
out.append(" key: ");
sprintf(buffer, "%" PRIu32, this->key);
sprintf(buffer, "%u", this->key);
out.append(buffer);
out.append("\n");
@ -4662,7 +4682,7 @@ void LockStateResponse::dump_to(std::string &out) const {
__attribute__((unused)) char buffer[64];
out.append("LockStateResponse {\n");
out.append(" key: ");
sprintf(buffer, "%" PRIu32, this->key);
sprintf(buffer, "%u", this->key);
out.append(buffer);
out.append("\n");
@ -4717,7 +4737,7 @@ void LockCommandRequest::dump_to(std::string &out) const {
__attribute__((unused)) char buffer[64];
out.append("LockCommandRequest {\n");
out.append(" key: ");
sprintf(buffer, "%" PRIu32, this->key);
sprintf(buffer, "%u", this->key);
out.append(buffer);
out.append("\n");
@ -4804,7 +4824,7 @@ void ListEntitiesButtonResponse::dump_to(std::string &out) const {
out.append("\n");
out.append(" key: ");
sprintf(buffer, "%" PRIu32, this->key);
sprintf(buffer, "%u", this->key);
out.append(buffer);
out.append("\n");
@ -4850,7 +4870,7 @@ void ButtonCommandRequest::dump_to(std::string &out) const {
__attribute__((unused)) char buffer[64];
out.append("ButtonCommandRequest {\n");
out.append(" key: ");
sprintf(buffer, "%" PRIu32, this->key);
sprintf(buffer, "%u", this->key);
out.append(buffer);
out.append("\n");
out.append("}");
@ -4925,7 +4945,7 @@ void ListEntitiesMediaPlayerResponse::dump_to(std::string &out) const {
out.append("\n");
out.append(" key: ");
sprintf(buffer, "%" PRIu32, this->key);
sprintf(buffer, "%u", this->key);
out.append(buffer);
out.append("\n");
@ -4994,7 +5014,7 @@ void MediaPlayerStateResponse::dump_to(std::string &out) const {
__attribute__((unused)) char buffer[64];
out.append("MediaPlayerStateResponse {\n");
out.append(" key: ");
sprintf(buffer, "%" PRIu32, this->key);
sprintf(buffer, "%u", this->key);
out.append(buffer);
out.append("\n");
@ -5073,7 +5093,7 @@ void MediaPlayerCommandRequest::dump_to(std::string &out) const {
__attribute__((unused)) char buffer[64];
out.append("MediaPlayerCommandRequest {\n");
out.append(" key: ");
sprintf(buffer, "%" PRIu32, this->key);
sprintf(buffer, "%u", this->key);
out.append(buffer);
out.append("\n");
@ -5122,7 +5142,7 @@ void SubscribeBluetoothLEAdvertisementsRequest::dump_to(std::string &out) const
__attribute__((unused)) char buffer[64];
out.append("SubscribeBluetoothLEAdvertisementsRequest {\n");
out.append(" flags: ");
sprintf(buffer, "%" PRIu32, this->flags);
sprintf(buffer, "%u", this->flags);
out.append(buffer);
out.append("\n");
out.append("}");
@ -5169,7 +5189,7 @@ void BluetoothServiceData::dump_to(std::string &out) const {
for (const auto &it : this->legacy_data) {
out.append(" legacy_data: ");
sprintf(buffer, "%" PRIu32, it);
sprintf(buffer, "%u", it);
out.append(buffer);
out.append("\n");
}
@ -5249,7 +5269,7 @@ void BluetoothLEAdvertisementResponse::dump_to(std::string &out) const {
out.append("\n");
out.append(" rssi: ");
sprintf(buffer, "%" PRId32, this->rssi);
sprintf(buffer, "%d", this->rssi);
out.append(buffer);
out.append("\n");
@ -5272,7 +5292,7 @@ void BluetoothLEAdvertisementResponse::dump_to(std::string &out) const {
}
out.append(" address_type: ");
sprintf(buffer, "%" PRIu32, this->address_type);
sprintf(buffer, "%u", this->address_type);
out.append(buffer);
out.append("\n");
out.append("}");
@ -5322,12 +5342,12 @@ void BluetoothLERawAdvertisement::dump_to(std::string &out) const {
out.append("\n");
out.append(" rssi: ");
sprintf(buffer, "%" PRId32, this->rssi);
sprintf(buffer, "%d", this->rssi);
out.append(buffer);
out.append("\n");
out.append(" address_type: ");
sprintf(buffer, "%" PRIu32, this->address_type);
sprintf(buffer, "%u", this->address_type);
out.append(buffer);
out.append("\n");
@ -5410,7 +5430,7 @@ void BluetoothDeviceRequest::dump_to(std::string &out) const {
out.append("\n");
out.append(" address_type: ");
sprintf(buffer, "%" PRIu32, this->address_type);
sprintf(buffer, "%u", this->address_type);
out.append(buffer);
out.append("\n");
out.append("}");
@ -5458,12 +5478,12 @@ void BluetoothDeviceConnectionResponse::dump_to(std::string &out) const {
out.append("\n");
out.append(" mtu: ");
sprintf(buffer, "%" PRIu32, this->mtu);
sprintf(buffer, "%u", this->mtu);
out.append(buffer);
out.append("\n");
out.append(" error: ");
sprintf(buffer, "%" PRId32, this->error);
sprintf(buffer, "%d", this->error);
out.append(buffer);
out.append("\n");
out.append("}");
@ -5523,7 +5543,7 @@ void BluetoothGATTDescriptor::dump_to(std::string &out) const {
}
out.append(" handle: ");
sprintf(buffer, "%" PRIu32, this->handle);
sprintf(buffer, "%u", this->handle);
out.append(buffer);
out.append("\n");
out.append("}");
@ -5579,12 +5599,12 @@ void BluetoothGATTCharacteristic::dump_to(std::string &out) const {
}
out.append(" handle: ");
sprintf(buffer, "%" PRIu32, this->handle);
sprintf(buffer, "%u", this->handle);
out.append(buffer);
out.append("\n");
out.append(" properties: ");
sprintf(buffer, "%" PRIu32, this->properties);
sprintf(buffer, "%u", this->properties);
out.append(buffer);
out.append("\n");
@ -5641,7 +5661,7 @@ void BluetoothGATTService::dump_to(std::string &out) const {
}
out.append(" handle: ");
sprintf(buffer, "%" PRIu32, this->handle);
sprintf(buffer, "%u", this->handle);
out.append(buffer);
out.append("\n");
@ -5748,7 +5768,7 @@ void BluetoothGATTReadRequest::dump_to(std::string &out) const {
out.append("\n");
out.append(" handle: ");
sprintf(buffer, "%" PRIu32, this->handle);
sprintf(buffer, "%u", this->handle);
out.append(buffer);
out.append("\n");
out.append("}");
@ -5793,7 +5813,7 @@ void BluetoothGATTReadResponse::dump_to(std::string &out) const {
out.append("\n");
out.append(" handle: ");
sprintf(buffer, "%" PRIu32, this->handle);
sprintf(buffer, "%u", this->handle);
out.append(buffer);
out.append("\n");
@ -5847,7 +5867,7 @@ void BluetoothGATTWriteRequest::dump_to(std::string &out) const {
out.append("\n");
out.append(" handle: ");
sprintf(buffer, "%" PRIu32, this->handle);
sprintf(buffer, "%u", this->handle);
out.append(buffer);
out.append("\n");
@ -5889,7 +5909,7 @@ void BluetoothGATTReadDescriptorRequest::dump_to(std::string &out) const {
out.append("\n");
out.append(" handle: ");
sprintf(buffer, "%" PRIu32, this->handle);
sprintf(buffer, "%u", this->handle);
out.append(buffer);
out.append("\n");
out.append("}");
@ -5934,7 +5954,7 @@ void BluetoothGATTWriteDescriptorRequest::dump_to(std::string &out) const {
out.append("\n");
out.append(" handle: ");
sprintf(buffer, "%" PRIu32, this->handle);
sprintf(buffer, "%u", this->handle);
out.append(buffer);
out.append("\n");
@ -5977,7 +5997,7 @@ void BluetoothGATTNotifyRequest::dump_to(std::string &out) const {
out.append("\n");
out.append(" handle: ");
sprintf(buffer, "%" PRIu32, this->handle);
sprintf(buffer, "%u", this->handle);
out.append(buffer);
out.append("\n");
@ -6026,7 +6046,7 @@ void BluetoothGATTNotifyDataResponse::dump_to(std::string &out) const {
out.append("\n");
out.append(" handle: ");
sprintf(buffer, "%" PRIu32, this->handle);
sprintf(buffer, "%u", this->handle);
out.append(buffer);
out.append("\n");
@ -6065,12 +6085,12 @@ void BluetoothConnectionsFreeResponse::dump_to(std::string &out) const {
__attribute__((unused)) char buffer[64];
out.append("BluetoothConnectionsFreeResponse {\n");
out.append(" free: ");
sprintf(buffer, "%" PRIu32, this->free);
sprintf(buffer, "%u", this->free);
out.append(buffer);
out.append("\n");
out.append(" limit: ");
sprintf(buffer, "%" PRIu32, this->limit);
sprintf(buffer, "%u", this->limit);
out.append(buffer);
out.append("\n");
out.append("}");
@ -6109,12 +6129,12 @@ void BluetoothGATTErrorResponse::dump_to(std::string &out) const {
out.append("\n");
out.append(" handle: ");
sprintf(buffer, "%" PRIu32, this->handle);
sprintf(buffer, "%u", this->handle);
out.append(buffer);
out.append("\n");
out.append(" error: ");
sprintf(buffer, "%" PRId32, this->error);
sprintf(buffer, "%d", this->error);
out.append(buffer);
out.append("\n");
out.append("}");
@ -6148,7 +6168,7 @@ void BluetoothGATTWriteResponse::dump_to(std::string &out) const {
out.append("\n");
out.append(" handle: ");
sprintf(buffer, "%" PRIu32, this->handle);
sprintf(buffer, "%u", this->handle);
out.append(buffer);
out.append("\n");
out.append("}");
@ -6182,7 +6202,7 @@ void BluetoothGATTNotifyResponse::dump_to(std::string &out) const {
out.append("\n");
out.append(" handle: ");
sprintf(buffer, "%" PRIu32, this->handle);
sprintf(buffer, "%u", this->handle);
out.append(buffer);
out.append("\n");
out.append("}");
@ -6225,7 +6245,7 @@ void BluetoothDevicePairingResponse::dump_to(std::string &out) const {
out.append("\n");
out.append(" error: ");
sprintf(buffer, "%" PRId32, this->error);
sprintf(buffer, "%d", this->error);
out.append(buffer);
out.append("\n");
out.append("}");
@ -6268,7 +6288,7 @@ void BluetoothDeviceUnpairingResponse::dump_to(std::string &out) const {
out.append("\n");
out.append(" error: ");
sprintf(buffer, "%" PRId32, this->error);
sprintf(buffer, "%d", this->error);
out.append(buffer);
out.append("\n");
out.append("}");
@ -6317,7 +6337,7 @@ void BluetoothDeviceClearCacheResponse::dump_to(std::string &out) const {
out.append("\n");
out.append(" error: ");
sprintf(buffer, "%" PRId32, this->error);
sprintf(buffer, "%d", this->error);
out.append(buffer);
out.append("\n");
out.append("}");
@ -6344,6 +6364,56 @@ void SubscribeVoiceAssistantRequest::dump_to(std::string &out) const {
out.append("}");
}
#endif
bool VoiceAssistantAudioSettings::decode_varint(uint32_t field_id, ProtoVarInt value) {
switch (field_id) {
case 1: {
this->noise_suppression_level = value.as_uint32();
return true;
}
case 2: {
this->auto_gain = value.as_uint32();
return true;
}
default:
return false;
}
}
bool VoiceAssistantAudioSettings::decode_32bit(uint32_t field_id, Proto32Bit value) {
switch (field_id) {
case 3: {
this->volume_multiplier = value.as_float();
return true;
}
default:
return false;
}
}
void VoiceAssistantAudioSettings::encode(ProtoWriteBuffer buffer) const {
buffer.encode_uint32(1, this->noise_suppression_level);
buffer.encode_uint32(2, this->auto_gain);
buffer.encode_float(3, this->volume_multiplier);
}
#ifdef HAS_PROTO_MESSAGE_DUMP
void VoiceAssistantAudioSettings::dump_to(std::string &out) const {
__attribute__((unused)) char buffer[64];
out.append("VoiceAssistantAudioSettings {\n");
out.append(" noise_suppression_level: ");
sprintf(buffer, "%u", this->noise_suppression_level);
out.append(buffer);
out.append("\n");
out.append(" auto_gain: ");
sprintf(buffer, "%u", this->auto_gain);
out.append(buffer);
out.append("\n");
out.append(" volume_multiplier: ");
sprintf(buffer, "%g", this->volume_multiplier);
out.append(buffer);
out.append("\n");
out.append("}");
}
#endif
bool VoiceAssistantRequest::decode_varint(uint32_t field_id, ProtoVarInt value) {
switch (field_id) {
case 1: {
@ -6351,7 +6421,7 @@ bool VoiceAssistantRequest::decode_varint(uint32_t field_id, ProtoVarInt value)
return true;
}
case 3: {
this->use_vad = value.as_bool();
this->flags = value.as_uint32();
return true;
}
default:
@ -6364,6 +6434,10 @@ bool VoiceAssistantRequest::decode_length(uint32_t field_id, ProtoLengthDelimite
this->conversation_id = value.as_string();
return true;
}
case 4: {
this->audio_settings = value.as_message<VoiceAssistantAudioSettings>();
return true;
}
default:
return false;
}
@ -6371,7 +6445,8 @@ bool VoiceAssistantRequest::decode_length(uint32_t field_id, ProtoLengthDelimite
void VoiceAssistantRequest::encode(ProtoWriteBuffer buffer) const {
buffer.encode_bool(1, this->start);
buffer.encode_string(2, this->conversation_id);
buffer.encode_bool(3, this->use_vad);
buffer.encode_uint32(3, this->flags);
buffer.encode_message<VoiceAssistantAudioSettings>(4, this->audio_settings);
}
#ifdef HAS_PROTO_MESSAGE_DUMP
void VoiceAssistantRequest::dump_to(std::string &out) const {
@ -6385,8 +6460,13 @@ void VoiceAssistantRequest::dump_to(std::string &out) const {
out.append("'").append(this->conversation_id).append("'");
out.append("\n");
out.append(" use_vad: ");
out.append(YESNO(this->use_vad));
out.append(" flags: ");
sprintf(buffer, "%u", this->flags);
out.append(buffer);
out.append("\n");
out.append(" audio_settings: ");
this->audio_settings.dump_to(out);
out.append("\n");
out.append("}");
}
@ -6414,7 +6494,7 @@ void VoiceAssistantResponse::dump_to(std::string &out) const {
__attribute__((unused)) char buffer[64];
out.append("VoiceAssistantResponse {\n");
out.append(" port: ");
sprintf(buffer, "%" PRIu32, this->port);
sprintf(buffer, "%u", this->port);
out.append(buffer);
out.append("\n");
@ -6577,7 +6657,7 @@ void ListEntitiesAlarmControlPanelResponse::dump_to(std::string &out) const {
out.append("\n");
out.append(" key: ");
sprintf(buffer, "%" PRIu32, this->key);
sprintf(buffer, "%u", this->key);
out.append(buffer);
out.append("\n");
@ -6602,7 +6682,7 @@ void ListEntitiesAlarmControlPanelResponse::dump_to(std::string &out) const {
out.append("\n");
out.append(" supported_features: ");
sprintf(buffer, "%" PRIu32, this->supported_features);
sprintf(buffer, "%u", this->supported_features);
out.append(buffer);
out.append("\n");
@ -6645,7 +6725,7 @@ void AlarmControlPanelStateResponse::dump_to(std::string &out) const {
__attribute__((unused)) char buffer[64];
out.append("AlarmControlPanelStateResponse {\n");
out.append(" key: ");
sprintf(buffer, "%" PRIu32, this->key);
sprintf(buffer, "%u", this->key);
out.append(buffer);
out.append("\n");
@ -6695,7 +6775,7 @@ void AlarmControlPanelCommandRequest::dump_to(std::string &out) const {
__attribute__((unused)) char buffer[64];
out.append("AlarmControlPanelCommandRequest {\n");
out.append(" key: ");
sprintf(buffer, "%" PRIu32, this->key);
sprintf(buffer, "%u", this->key);
out.append(buffer);
out.append("\n");

View File

@ -165,6 +165,11 @@ enum BluetoothDeviceRequestType : uint32_t {
BLUETOOTH_DEVICE_REQUEST_TYPE_CONNECT_V3_WITHOUT_CACHE = 5,
BLUETOOTH_DEVICE_REQUEST_TYPE_CLEAR_CACHE = 6,
};
enum VoiceAssistantRequestFlag : uint32_t {
VOICE_ASSISTANT_REQUEST_NONE = 0,
VOICE_ASSISTANT_REQUEST_USE_VAD = 1,
VOICE_ASSISTANT_REQUEST_USE_WAKE_WORD = 2,
};
enum VoiceAssistantEvent : uint32_t {
VOICE_ASSISTANT_ERROR = 0,
VOICE_ASSISTANT_RUN_START = 1,
@ -175,6 +180,10 @@ enum VoiceAssistantEvent : uint32_t {
VOICE_ASSISTANT_INTENT_END = 6,
VOICE_ASSISTANT_TTS_START = 7,
VOICE_ASSISTANT_TTS_END = 8,
VOICE_ASSISTANT_WAKE_WORD_START = 9,
VOICE_ASSISTANT_WAKE_WORD_END = 10,
VOICE_ASSISTANT_STT_VAD_START = 11,
VOICE_ASSISTANT_STT_VAD_END = 12,
};
enum AlarmControlPanelState : uint32_t {
ALARM_STATE_DISARMED = 0,
@ -1651,11 +1660,26 @@ class SubscribeVoiceAssistantRequest : public ProtoMessage {
protected:
bool decode_varint(uint32_t field_id, ProtoVarInt value) override;
};
class VoiceAssistantAudioSettings : public ProtoMessage {
public:
uint32_t noise_suppression_level{0};
uint32_t auto_gain{0};
float volume_multiplier{0.0f};
void encode(ProtoWriteBuffer buffer) const override;
#ifdef HAS_PROTO_MESSAGE_DUMP
void dump_to(std::string &out) const override;
#endif
protected:
bool decode_32bit(uint32_t field_id, Proto32Bit value) override;
bool decode_varint(uint32_t field_id, ProtoVarInt value) override;
};
class VoiceAssistantRequest : public ProtoMessage {
public:
bool start{false};
std::string conversation_id{};
bool use_vad{false};
uint32_t flags{0};
VoiceAssistantAudioSettings audio_settings{};
void encode(ProtoWriteBuffer buffer) const override;
#ifdef HAS_PROTO_MESSAGE_DUMP
void dump_to(std::string &out) const override;

View File

@ -1,13 +1,13 @@
#include "api_server.h"
#include <cerrno>
#include "api_connection.h"
#include "esphome/components/network/util.h"
#include "esphome/core/application.h"
#include "esphome/core/defines.h"
#include "esphome/core/hal.h"
#include "esphome/core/log.h"
#include "esphome/core/util.h"
#include "esphome/core/version.h"
#include "esphome/core/hal.h"
#include "esphome/components/network/util.h"
#include <cerrno>
#ifdef USE_LOGGER
#include "esphome/components/logger/logger.h"
@ -323,16 +323,24 @@ void APIServer::on_shutdown() {
}
#ifdef USE_VOICE_ASSISTANT
bool APIServer::start_voice_assistant(const std::string &conversation_id, bool use_vad) {
bool APIServer::start_voice_assistant(const std::string &conversation_id, uint32_t flags,
const api::VoiceAssistantAudioSettings &audio_settings) {
VoiceAssistantRequest msg;
msg.start = true;
msg.conversation_id = conversation_id;
msg.flags = flags;
msg.audio_settings = audio_settings;
for (auto &c : this->clients_) {
if (c->request_voice_assistant(true, conversation_id, use_vad))
if (c->request_voice_assistant(msg))
return true;
}
return false;
}
void APIServer::stop_voice_assistant() {
VoiceAssistantRequest msg;
msg.start = false;
for (auto &c : this->clients_) {
if (c->request_voice_assistant(false, "", false))
if (c->request_voice_assistant(msg))
return;
}
}

View File

@ -1,16 +1,16 @@
#pragma once
#include "api_noise_context.h"
#include "api_pb2.h"
#include "api_pb2_service.h"
#include "esphome/components/socket/socket.h"
#include "esphome/core/component.h"
#include "esphome/core/controller.h"
#include "esphome/core/defines.h"
#include "esphome/core/log.h"
#include "esphome/components/socket/socket.h"
#include "api_pb2.h"
#include "api_pb2_service.h"
#include "list_entities.h"
#include "subscribe_state.h"
#include "user_services.h"
#include "api_noise_context.h"
#include <vector>
@ -81,7 +81,8 @@ class APIServer : public Component, public Controller {
#endif
#ifdef USE_VOICE_ASSISTANT
bool start_voice_assistant(const std::string &conversation_id, bool use_vad);
bool start_voice_assistant(const std::string &conversation_id, uint32_t flags,
const api::VoiceAssistantAudioSettings &audio_settings);
void stop_voice_assistant();
#endif

View File

@ -66,8 +66,9 @@ void I2SAudioMicrophone::start_() {
i2s_set_adc_mode(ADC_UNIT_1, this->adc_channel_);
i2s_adc_enable(this->parent_->get_port());
} else {
} else
#endif
{
if (this->pdm_)
config.mode = (i2s_mode_t) (config.mode | I2S_MODE_PDM);
@ -77,9 +78,7 @@ void I2SAudioMicrophone::start_() {
pin_config.data_in_num = this->din_pin_;
i2s_set_pin(this->parent_->get_port(), &pin_config);
#if SOC_I2S_SUPPORTS_ADC
}
#endif
this->state_ = microphone::STATE_RUNNING;
this->high_freq_.start();
}
@ -110,6 +109,10 @@ size_t I2SAudioMicrophone::read(int16_t *buf, size_t len) {
this->status_set_warning();
return 0;
}
if (bytes_read == 0) {
this->status_set_warning();
return 0;
}
this->status_clear_warning();
if (this->bits_per_sample_ == I2S_BITS_PER_SAMPLE_16BIT) {
return bytes_read;

View File

@ -158,10 +158,10 @@ void I2SAudioSpeaker::watch_() {
this->status_clear_warning();
break;
case TaskEventType::STOPPED:
this->parent_->unlock();
this->state_ = speaker::STATE_STOPPED;
vTaskDelete(this->player_task_handle_);
this->player_task_handle_ = nullptr;
this->parent_->unlock();
break;
case TaskEventType::WARNING:
ESP_LOGW(TAG, "Error writing to I2S: %s", esp_err_to_name(event.err));
@ -177,9 +177,9 @@ void I2SAudioSpeaker::loop() {
this->start_();
break;
case speaker::STATE_RUNNING:
case speaker::STATE_STOPPING:
this->watch_();
break;
case speaker::STATE_STOPPING:
case speaker::STATE_STOPPED:
break;
}

View File

@ -19,11 +19,18 @@ CODEOWNERS = ["@jesserockz"]
CONF_SILENCE_DETECTION = "silence_detection"
CONF_ON_LISTENING = "on_listening"
CONF_ON_START = "on_start"
CONF_ON_WAKE_WORD_DETECTED = "on_wake_word_detected"
CONF_ON_STT_END = "on_stt_end"
CONF_ON_TTS_START = "on_tts_start"
CONF_ON_TTS_END = "on_tts_end"
CONF_ON_END = "on_end"
CONF_ON_ERROR = "on_error"
CONF_USE_WAKE_WORD = "use_wake_word"
CONF_VAD_THRESHOLD = "vad_threshold"
CONF_NOISE_SUPPRESSION_LEVEL = "noise_suppression_level"
CONF_AUTO_GAIN = "auto_gain"
CONF_VOLUME_MULTIPLIER = "volume_multiplier"
voice_assistant_ns = cg.esphome_ns.namespace("voice_assistant")
@ -42,23 +49,40 @@ IsRunningCondition = voice_assistant_ns.class_(
"IsRunningCondition", automation.Condition, cg.Parented.template(VoiceAssistant)
)
CONFIG_SCHEMA = cv.Schema(
{
cv.GenerateID(): cv.declare_id(VoiceAssistant),
cv.GenerateID(CONF_MICROPHONE): cv.use_id(microphone.Microphone),
cv.Exclusive(CONF_SPEAKER, "output"): cv.use_id(speaker.Speaker),
cv.Exclusive(CONF_MEDIA_PLAYER, "output"): cv.use_id(media_player.MediaPlayer),
cv.Optional(CONF_SILENCE_DETECTION, default=True): cv.boolean,
cv.Optional(CONF_ON_LISTENING): automation.validate_automation(single=True),
cv.Optional(CONF_ON_START): automation.validate_automation(single=True),
cv.Optional(CONF_ON_STT_END): automation.validate_automation(single=True),
cv.Optional(CONF_ON_TTS_START): automation.validate_automation(single=True),
cv.Optional(CONF_ON_TTS_END): automation.validate_automation(single=True),
cv.Optional(CONF_ON_END): automation.validate_automation(single=True),
cv.Optional(CONF_ON_ERROR): automation.validate_automation(single=True),
}
).extend(cv.COMPONENT_SCHEMA)
CONFIG_SCHEMA = cv.All(
cv.Schema(
{
cv.GenerateID(): cv.declare_id(VoiceAssistant),
cv.GenerateID(CONF_MICROPHONE): cv.use_id(microphone.Microphone),
cv.Exclusive(CONF_SPEAKER, "output"): cv.use_id(speaker.Speaker),
cv.Exclusive(CONF_MEDIA_PLAYER, "output"): cv.use_id(
media_player.MediaPlayer
),
cv.Optional(CONF_USE_WAKE_WORD, default=False): cv.boolean,
cv.Optional(CONF_VAD_THRESHOLD): cv.All(
cv.requires_component("esp_adf"), cv.only_with_esp_idf, cv.uint8_t
),
cv.Optional(CONF_NOISE_SUPPRESSION_LEVEL, default=0): cv.int_range(0, 4),
cv.Optional(CONF_AUTO_GAIN, default="0dBFS"): cv.All(
cv.float_with_unit("decibel full scale", "(dBFS|dbfs|DBFS)"),
cv.int_range(0, 31),
),
cv.Optional(CONF_VOLUME_MULTIPLIER, default=1.0): cv.float_range(
min=0.0, min_included=False
),
cv.Optional(CONF_ON_LISTENING): automation.validate_automation(single=True),
cv.Optional(CONF_ON_START): automation.validate_automation(single=True),
cv.Optional(CONF_ON_WAKE_WORD_DETECTED): automation.validate_automation(
single=True
),
cv.Optional(CONF_ON_STT_END): automation.validate_automation(single=True),
cv.Optional(CONF_ON_TTS_START): automation.validate_automation(single=True),
cv.Optional(CONF_ON_TTS_END): automation.validate_automation(single=True),
cv.Optional(CONF_ON_END): automation.validate_automation(single=True),
cv.Optional(CONF_ON_ERROR): automation.validate_automation(single=True),
}
).extend(cv.COMPONENT_SCHEMA),
)
async def to_code(config):
@ -76,7 +100,14 @@ async def to_code(config):
mp = await cg.get_variable(config[CONF_MEDIA_PLAYER])
cg.add(var.set_media_player(mp))
cg.add(var.set_silence_detection(config[CONF_SILENCE_DETECTION]))
cg.add(var.set_use_wake_word(config[CONF_USE_WAKE_WORD]))
if (vad_threshold := config.get(CONF_VAD_THRESHOLD)) is not None:
cg.add(var.set_vad_threshold(vad_threshold))
cg.add(var.set_noise_suppression_level(config[CONF_NOISE_SUPPRESSION_LEVEL]))
cg.add(var.set_auto_gain(config[CONF_AUTO_GAIN]))
cg.add(var.set_volume_multiplier(config[CONF_VOLUME_MULTIPLIER]))
if CONF_ON_LISTENING in config:
await automation.build_automation(
@ -88,6 +119,13 @@ async def to_code(config):
var.get_start_trigger(), [], config[CONF_ON_START]
)
if CONF_ON_WAKE_WORD_DETECTED in config:
await automation.build_automation(
var.get_wake_word_detected_trigger(),
[],
config[CONF_ON_WAKE_WORD_DETECTED],
)
if CONF_ON_STT_END in config:
await automation.build_automation(
var.get_stt_end_trigger(), [(cg.std_string, "x")], config[CONF_ON_STT_END]
@ -128,10 +166,20 @@ VOICE_ASSISTANT_ACTION_SCHEMA = cv.Schema({cv.GenerateID(): cv.use_id(VoiceAssis
StartContinuousAction,
VOICE_ASSISTANT_ACTION_SCHEMA,
)
@register_action("voice_assistant.start", StartAction, VOICE_ASSISTANT_ACTION_SCHEMA)
@register_action(
"voice_assistant.start",
StartAction,
VOICE_ASSISTANT_ACTION_SCHEMA.extend(
{
cv.Optional(CONF_SILENCE_DETECTION, default=True): cv.boolean,
}
),
)
async def voice_assistant_listen_to_code(config, action_id, template_arg, args):
var = cg.new_Pvariable(action_id, template_arg)
await cg.register_parented(var, config[CONF_ID])
if CONF_SILENCE_DETECTION in config:
cg.add(var.set_silence_detection(config[CONF_SILENCE_DETECTION]))
return var

View File

@ -11,6 +11,17 @@ namespace voice_assistant {
static const char *const TAG = "voice_assistant";
#ifdef SAMPLE_RATE_HZ
#undef SAMPLE_RATE_HZ
#endif
static const size_t SAMPLE_RATE_HZ = 16000;
static const size_t INPUT_BUFFER_SIZE = 32 * SAMPLE_RATE_HZ / 1000; // 32ms * 16kHz / 1000ms
static const size_t BUFFER_SIZE = 1000 * SAMPLE_RATE_HZ / 1000; // 1s
static const size_t SEND_BUFFER_SIZE = INPUT_BUFFER_SIZE * sizeof(int16_t);
static const size_t RECEIVE_SIZE = 1024;
static const size_t SPEAKER_BUFFER_SIZE = 16 * RECEIVE_SIZE;
float VoiceAssistant::get_setup_priority() const { return setup_priority::AFTER_CONNECTION; }
void VoiceAssistant::setup() {
@ -47,7 +58,6 @@ void VoiceAssistant::setup() {
this->mark_failed();
return;
}
server.ss_family = AF_INET;
err = socket_->bind((struct sockaddr *) &server, sizeof(server));
if (err != 0) {
@ -55,60 +65,262 @@ void VoiceAssistant::setup() {
this->mark_failed();
return;
}
ExternalRAMAllocator<uint8_t> speaker_allocator(ExternalRAMAllocator<uint8_t>::ALLOW_FAILURE);
this->speaker_buffer_ = speaker_allocator.allocate(SPEAKER_BUFFER_SIZE);
if (this->speaker_buffer_ == nullptr) {
ESP_LOGW(TAG, "Could not allocate speaker buffer.");
this->mark_failed();
return;
}
}
#endif
this->mic_->add_data_callback([this](const std::vector<int16_t> &data) {
if (!this->running_) {
return;
ExternalRAMAllocator<int16_t> allocator(ExternalRAMAllocator<int16_t>::ALLOW_FAILURE);
this->input_buffer_ = allocator.allocate(INPUT_BUFFER_SIZE);
if (this->input_buffer_ == nullptr) {
ESP_LOGW(TAG, "Could not allocate input buffer.");
this->mark_failed();
return;
}
#ifdef USE_ESP_ADF
this->vad_instance_ = vad_create(VAD_MODE_4);
this->ring_buffer_ = rb_create(BUFFER_SIZE, sizeof(int16_t));
if (this->ring_buffer_ == nullptr) {
ESP_LOGW(TAG, "Could not allocate ring buffer.");
this->mark_failed();
return;
}
#endif
ExternalRAMAllocator<uint8_t> send_allocator(ExternalRAMAllocator<uint8_t>::ALLOW_FAILURE);
this->send_buffer_ = send_allocator.allocate(SEND_BUFFER_SIZE);
if (send_buffer_ == nullptr) {
ESP_LOGW(TAG, "Could not allocate send buffer.");
this->mark_failed();
return;
}
}
int VoiceAssistant::read_microphone_() {
size_t bytes_read = 0;
if (this->mic_->is_running()) { // Read audio into input buffer
bytes_read = this->mic_->read(this->input_buffer_, INPUT_BUFFER_SIZE * sizeof(int16_t));
if (bytes_read == 0) {
memset(this->input_buffer_, 0, INPUT_BUFFER_SIZE * sizeof(int16_t));
return 0;
}
this->socket_->sendto(data.data(), data.size() * sizeof(int16_t), 0, (struct sockaddr *) &this->dest_addr_,
sizeof(this->dest_addr_));
});
#ifdef USE_ESP_ADF
// Write audio into ring buffer
int available = rb_bytes_available(this->ring_buffer_);
if (available < bytes_read) {
rb_read(this->ring_buffer_, nullptr, bytes_read - available, 0);
}
rb_write(this->ring_buffer_, (char *) this->input_buffer_, bytes_read, 0);
#endif
} else {
ESP_LOGD(TAG, "microphone not running");
}
return bytes_read;
}
void VoiceAssistant::loop() {
#ifdef USE_SPEAKER
if (this->speaker_ != nullptr) {
uint8_t buf[1024];
auto len = this->socket_->read(buf, sizeof(buf));
if (len == -1) {
return;
if (this->state_ != State::IDLE && this->state_ != State::STOP_MICROPHONE &&
this->state_ != State::STOPPING_MICROPHONE && !api::global_api_server->is_connected()) {
if (this->mic_->is_running() || this->state_ == State::STARTING_MICROPHONE) {
this->set_state_(State::STOP_MICROPHONE, State::IDLE);
} else {
this->set_state_(State::IDLE, State::IDLE);
}
this->speaker_->play(buf, len);
this->set_timeout("data-incoming", 200, [this]() {
if (this->continuous_) {
this->request_start(true);
}
});
this->continuous_ = false;
this->signal_stop_();
return;
}
switch (this->state_) {
case State::IDLE: {
if (this->continuous_ && this->desired_state_ == State::IDLE) {
#ifdef USE_ESP_ADF
if (this->use_wake_word_) {
rb_reset(this->ring_buffer_);
this->set_state_(State::START_MICROPHONE, State::WAIT_FOR_VAD);
} else
#endif
{
this->set_state_(State::START_PIPELINE, State::START_MICROPHONE);
}
} else {
this->high_freq_.stop();
}
break;
}
case State::START_MICROPHONE: {
ESP_LOGD(TAG, "Starting Microphone");
memset(this->send_buffer_, 0, SEND_BUFFER_SIZE);
memset(this->input_buffer_, 0, INPUT_BUFFER_SIZE * sizeof(int16_t));
this->mic_->start();
this->high_freq_.start();
this->set_state_(State::STARTING_MICROPHONE);
break;
}
case State::STARTING_MICROPHONE: {
if (this->mic_->is_running()) {
this->set_state_(this->desired_state_);
}
break;
}
#ifdef USE_ESP_ADF
case State::WAIT_FOR_VAD: {
this->read_microphone_();
ESP_LOGD(TAG, "Waiting for speech...");
this->set_state_(State::WAITING_FOR_VAD);
break;
}
case State::WAITING_FOR_VAD: {
size_t bytes_read = this->read_microphone_();
if (bytes_read > 0) {
vad_state_t vad_state =
vad_process(this->vad_instance_, this->input_buffer_, SAMPLE_RATE_HZ, VAD_FRAME_LENGTH_MS);
if (vad_state == VAD_SPEECH) {
if (this->vad_counter_ < this->vad_threshold_) {
this->vad_counter_++;
} else {
ESP_LOGD(TAG, "VAD detected speech");
this->set_state_(State::START_PIPELINE, State::STREAMING_MICROPHONE);
}
} else {
if (this->vad_counter_ > 0) {
this->vad_counter_--;
}
}
}
break;
}
#endif
case State::START_PIPELINE: {
this->read_microphone_();
ESP_LOGD(TAG, "Requesting start...");
uint32_t flags = 0;
if (this->use_wake_word_)
flags |= api::enums::VOICE_ASSISTANT_REQUEST_USE_WAKE_WORD;
if (this->silence_detection_)
flags |= api::enums::VOICE_ASSISTANT_REQUEST_USE_VAD;
api::VoiceAssistantAudioSettings audio_settings;
audio_settings.noise_suppression_level = this->noise_suppression_level_;
audio_settings.auto_gain = this->auto_gain_;
audio_settings.volume_multiplier = this->volume_multiplier_;
if (!api::global_api_server->start_voice_assistant(this->conversation_id_, flags, audio_settings)) {
ESP_LOGW(TAG, "Could not request start.");
this->error_trigger_->trigger("not-connected", "Could not request start.");
this->continuous_ = false;
this->set_state_(State::IDLE, State::IDLE);
break;
}
this->set_state_(State::STARTING_PIPELINE);
this->set_timeout("reset-conversation_id", 5 * 60 * 1000, [this]() { this->conversation_id_ = ""; });
break;
}
case State::STARTING_PIPELINE: {
this->read_microphone_();
break; // State changed when udp server port received
}
case State::STREAMING_MICROPHONE: {
size_t bytes_read = this->read_microphone_();
#ifdef USE_ESP_ADF
if (rb_bytes_filled(this->ring_buffer_) >= SEND_BUFFER_SIZE) {
rb_read(this->ring_buffer_, (char *) this->send_buffer_, SEND_BUFFER_SIZE, 0);
this->socket_->sendto(this->send_buffer_, SEND_BUFFER_SIZE, 0, (struct sockaddr *) &this->dest_addr_,
sizeof(this->dest_addr_));
}
#else
if (bytes_read > 0) {
this->socket_->sendto(this->input_buffer_, bytes_read, 0, (struct sockaddr *) &this->dest_addr_,
sizeof(this->dest_addr_));
}
#endif
break;
}
case State::STOP_MICROPHONE: {
if (this->mic_->is_running()) {
this->mic_->stop();
this->set_state_(State::STOPPING_MICROPHONE);
} else {
this->set_state_(this->desired_state_);
}
break;
}
case State::STOPPING_MICROPHONE: {
if (this->mic_->is_stopped()) {
this->set_state_(this->desired_state_);
}
break;
}
case State::AWAITING_RESPONSE: {
break; // State changed by events
}
case State::STREAMING_RESPONSE: {
bool playing = false;
#ifdef USE_SPEAKER
if (this->speaker_ != nullptr) {
if (this->speaker_buffer_index_ + RECEIVE_SIZE < SPEAKER_BUFFER_SIZE) {
auto len = this->socket_->read(this->speaker_buffer_ + this->speaker_buffer_index_, RECEIVE_SIZE);
if (len > 0) {
this->speaker_buffer_index_ += len;
this->speaker_buffer_size_ += len;
}
} else {
ESP_LOGW(TAG, "Speaker buffer full.");
}
if (this->speaker_buffer_size_ > 0) {
size_t written = this->speaker_->play(this->speaker_buffer_, this->speaker_buffer_size_);
memmove(this->speaker_buffer_, this->speaker_buffer_ + written, this->speaker_buffer_size_ - written);
this->speaker_buffer_size_ -= written;
this->speaker_buffer_index_ -= written;
}
playing = this->speaker_->is_running();
}
#endif
#ifdef USE_MEDIA_PLAYER
if (this->media_player_ != nullptr) {
if (!this->playing_tts_ ||
this->media_player_->state == media_player::MediaPlayerState::MEDIA_PLAYER_STATE_PLAYING) {
return;
}
this->set_timeout("playing-media", 1000, [this]() {
this->playing_tts_ = false;
if (this->continuous_) {
this->request_start(true);
if (this->media_player_ != nullptr) {
playing = (this->media_player_->state == media_player::MediaPlayerState::MEDIA_PLAYER_STATE_PLAYING);
}
});
return;
}
#endif
// Set a 1 second timeout to start the voice assistant again.
this->set_timeout("continuous-no-sound", 1000, [this]() {
if (this->continuous_) {
this->request_start(true);
if (playing) {
this->set_timeout("playing", 100, [this]() { this->set_state_(State::IDLE, State::IDLE); });
}
break;
}
});
default:
break;
}
}
void VoiceAssistant::start(struct sockaddr_storage *addr, uint16_t port) {
ESP_LOGD(TAG, "Starting...");
void VoiceAssistant::set_state_(State state) {
State old_state = this->state_;
this->state_ = state;
ESP_LOGD(TAG, "State changed from %d to %d", static_cast<uint8_t>(old_state), static_cast<uint8_t>(state));
}
void VoiceAssistant::set_state_(State state, State desired_state) {
this->set_state_(state);
this->desired_state_ = desired_state;
ESP_LOGD(TAG, "Desired state set to %d", static_cast<uint8_t>(desired_state));
}
void VoiceAssistant::failed_to_start() {
ESP_LOGE(TAG, "Failed to start server. See Home Assistant logs for more details.");
this->error_trigger_->trigger("failed-to-start", "Failed to start server. See Home Assistant logs for more details.");
this->set_state_(State::STOP_MICROPHONE, State::IDLE);
}
void VoiceAssistant::start_streaming(struct sockaddr_storage *addr, uint16_t port) {
if (this->state_ != State::STARTING_PIPELINE) {
this->signal_stop_();
return;
}
ESP_LOGD(TAG, "Client started, streaming microphone");
memcpy(&this->dest_addr_, addr, sizeof(this->dest_addr_));
if (this->dest_addr_.ss_family == AF_INET) {
@ -123,38 +335,90 @@ void VoiceAssistant::start(struct sockaddr_storage *addr, uint16_t port) {
ESP_LOGW(TAG, "Unknown address family: %d", this->dest_addr_.ss_family);
return;
}
this->running_ = true;
this->mic_->start();
this->listening_trigger_->trigger();
if (this->mic_->is_running()) {
this->set_state_(State::STREAMING_MICROPHONE, State::STREAMING_MICROPHONE);
} else {
this->set_state_(State::START_MICROPHONE, State::STREAMING_MICROPHONE);
}
}
void VoiceAssistant::request_start(bool continuous) {
ESP_LOGD(TAG, "Requesting start...");
if (!api::global_api_server->start_voice_assistant(this->conversation_id_, this->silence_detection_)) {
ESP_LOGW(TAG, "Could not request start.");
this->error_trigger_->trigger("not-connected", "Could not request start.");
void VoiceAssistant::request_start(bool continuous, bool silence_detection) {
if (!api::global_api_server->is_connected()) {
ESP_LOGE(TAG, "No API client connected");
this->set_state_(State::IDLE, State::IDLE);
this->continuous_ = false;
return;
}
this->continuous_ = continuous;
this->set_timeout("reset-conversation_id", 5 * 60 * 1000, [this]() { this->conversation_id_ = ""; });
if (this->state_ == State::IDLE) {
this->continuous_ = continuous;
this->silence_detection_ = silence_detection;
#ifdef USE_ESP_ADF
if (this->use_wake_word_) {
rb_reset(this->ring_buffer_);
this->set_state_(State::START_MICROPHONE, State::WAIT_FOR_VAD);
} else
#endif
{
this->set_state_(State::START_PIPELINE, State::START_MICROPHONE);
}
}
}
void VoiceAssistant::signal_stop() {
void VoiceAssistant::request_stop() {
this->continuous_ = false;
switch (this->state_) {
case State::IDLE:
break;
case State::START_MICROPHONE:
case State::STARTING_MICROPHONE:
case State::WAIT_FOR_VAD:
case State::WAITING_FOR_VAD:
case State::START_PIPELINE:
this->set_state_(State::STOP_MICROPHONE, State::IDLE);
break;
case State::STARTING_PIPELINE:
case State::STREAMING_MICROPHONE:
this->signal_stop_();
this->set_state_(State::STOP_MICROPHONE, State::IDLE);
break;
case State::STOP_MICROPHONE:
case State::STOPPING_MICROPHONE:
this->desired_state_ = State::IDLE;
break;
case State::AWAITING_RESPONSE:
case State::STREAMING_RESPONSE:
break; // Let the incoming audio stream finish then it will go to idle.
}
}
void VoiceAssistant::signal_stop_() {
ESP_LOGD(TAG, "Signaling stop...");
this->mic_->stop();
this->running_ = false;
api::global_api_server->stop_voice_assistant();
memset(&this->dest_addr_, 0, sizeof(this->dest_addr_));
}
void VoiceAssistant::on_event(const api::VoiceAssistantEventResponse &msg) {
ESP_LOGD(TAG, "Event Type: %d", msg.event_type);
switch (msg.event_type) {
case api::enums::VOICE_ASSISTANT_RUN_START:
ESP_LOGD(TAG, "Assist Pipeline running");
this->start_trigger_->trigger();
break;
case api::enums::VOICE_ASSISTANT_WAKE_WORD_START:
break;
case api::enums::VOICE_ASSISTANT_WAKE_WORD_END: {
ESP_LOGD(TAG, "Wake word detected");
this->wake_word_detected_trigger_->trigger();
break;
}
case api::enums::VOICE_ASSISTANT_STT_START:
ESP_LOGD(TAG, "STT Started");
this->listening_trigger_->trigger();
break;
case api::enums::VOICE_ASSISTANT_STT_END: {
this->set_state_(State::STOP_MICROPHONE, State::AWAITING_RESPONSE);
std::string text;
for (auto arg : msg.data) {
if (arg.name == "text") {
@ -166,7 +430,6 @@ void VoiceAssistant::on_event(const api::VoiceAssistantEventResponse &msg) {
return;
}
ESP_LOGD(TAG, "Speech recognised as: \"%s\"", text.c_str());
this->signal_stop();
this->stt_end_trigger_->trigger(text);
break;
}
@ -191,6 +454,9 @@ void VoiceAssistant::on_event(const api::VoiceAssistantEventResponse &msg) {
}
ESP_LOGD(TAG, "Response: \"%s\"", text.c_str());
this->tts_start_trigger_->trigger(text);
#ifdef USE_SPEAKER
this->speaker_->start();
#endif
break;
}
case api::enums::VOICE_ASSISTANT_TTS_END: {
@ -207,17 +473,22 @@ void VoiceAssistant::on_event(const api::VoiceAssistantEventResponse &msg) {
ESP_LOGD(TAG, "Response URL: \"%s\"", url.c_str());
#ifdef USE_MEDIA_PLAYER
if (this->media_player_ != nullptr) {
this->playing_tts_ = true;
this->media_player_->make_call().set_media_url(url).perform();
}
#endif
State new_state = this->local_output_ ? State::STREAMING_RESPONSE : State::IDLE;
this->set_state_(new_state, new_state);
this->tts_end_trigger_->trigger(url);
break;
}
case api::enums::VOICE_ASSISTANT_RUN_END:
case api::enums::VOICE_ASSISTANT_RUN_END: {
ESP_LOGD(TAG, "Assist Pipeline ended");
if (this->state_ != State::STREAMING_RESPONSE && this->state_ != State::IDLE) {
this->set_state_(State::IDLE, State::IDLE);
}
this->end_trigger_->trigger();
break;
}
case api::enums::VOICE_ASSISTANT_ERROR: {
std::string code = "";
std::string message = "";
@ -228,12 +499,20 @@ void VoiceAssistant::on_event(const api::VoiceAssistantEventResponse &msg) {
message = std::move(arg.value);
}
}
if (code == "wake-word-timeout" || code == "wake_word_detection_aborted") {
this->set_state_(State::STOP_MICROPHONE, State::IDLE);
return;
}
ESP_LOGE(TAG, "Error: %s - %s", code.c_str(), message.c_str());
this->continuous_ = false;
this->signal_stop();
if (this->state_ != State::IDLE) {
this->signal_stop_();
this->set_state_(State::STOP_MICROPHONE, State::IDLE);
}
this->error_trigger_->trigger(code, message);
break;
}
default:
ESP_LOGD(TAG, "Unhandled event type: %d", msg.event_type);
break;
}
}

View File

@ -19,6 +19,11 @@
#endif
#include "esphome/components/socket/socket.h"
#ifdef USE_ESP_ADF
#include <esp_vad.h>
#include <ringbuf.h>
#endif
namespace esphome {
namespace voice_assistant {
@ -28,19 +33,41 @@ namespace voice_assistant {
static const uint32_t INITIAL_VERSION = 1;
static const uint32_t SPEAKER_SUPPORT = 2;
enum class State {
IDLE,
START_MICROPHONE,
STARTING_MICROPHONE,
WAIT_FOR_VAD,
WAITING_FOR_VAD,
START_PIPELINE,
STARTING_PIPELINE,
STREAMING_MICROPHONE,
STOP_MICROPHONE,
STOPPING_MICROPHONE,
AWAITING_RESPONSE,
STREAMING_RESPONSE,
};
class VoiceAssistant : public Component {
public:
void setup() override;
void loop() override;
float get_setup_priority() const override;
void start(struct sockaddr_storage *addr, uint16_t port);
void start_streaming(struct sockaddr_storage *addr, uint16_t port);
void failed_to_start();
void set_microphone(microphone::Microphone *mic) { this->mic_ = mic; }
#ifdef USE_SPEAKER
void set_speaker(speaker::Speaker *speaker) { this->speaker_ = speaker; }
void set_speaker(speaker::Speaker *speaker) {
this->speaker_ = speaker;
this->local_output_ = true;
}
#endif
#ifdef USE_MEDIA_PLAYER
void set_media_player(media_player::MediaPlayer *media_player) { this->media_player_ = media_player; }
void set_media_player(media_player::MediaPlayer *media_player) {
this->media_player_ = media_player;
this->local_output_ = true;
}
#endif
uint32_t get_version() const {
@ -52,19 +79,29 @@ class VoiceAssistant : public Component {
return INITIAL_VERSION;
}
void request_start(bool continuous = false);
void signal_stop();
void request_start(bool continuous, bool silence_detection);
void request_stop();
void on_event(const api::VoiceAssistantEventResponse &msg);
bool is_running() const { return this->running_; }
bool is_running() const { return this->state_ != State::IDLE; }
void set_continuous(bool continuous) { this->continuous_ = continuous; }
bool is_continuous() const { return this->continuous_; }
void set_silence_detection(bool silence_detection) { this->silence_detection_ = silence_detection; }
void set_use_wake_word(bool use_wake_word) { this->use_wake_word_ = use_wake_word; }
#ifdef USE_ESP_ADF
void set_vad_threshold(uint8_t vad_threshold) { this->vad_threshold_ = vad_threshold; }
#endif
void set_noise_suppression_level(uint8_t noise_suppression_level) {
this->noise_suppression_level_ = noise_suppression_level;
}
void set_auto_gain(uint8_t auto_gain) { this->auto_gain_ = auto_gain; }
void set_volume_multiplier(float volume_multiplier) { this->volume_multiplier_ = volume_multiplier; }
Trigger<> *get_listening_trigger() const { return this->listening_trigger_; }
Trigger<> *get_start_trigger() const { return this->start_trigger_; }
Trigger<> *get_wake_word_detected_trigger() const { return this->wake_word_detected_trigger_; }
Trigger<std::string> *get_stt_end_trigger() const { return this->stt_end_trigger_; }
Trigger<std::string> *get_tts_start_trigger() const { return this->tts_start_trigger_; }
Trigger<std::string> *get_tts_end_trigger() const { return this->tts_end_trigger_; }
@ -72,11 +109,17 @@ class VoiceAssistant : public Component {
Trigger<std::string, std::string> *get_error_trigger() const { return this->error_trigger_; }
protected:
int read_microphone_();
void set_state_(State state);
void set_state_(State state, State desired_state);
void signal_stop_();
std::unique_ptr<socket::Socket> socket_ = nullptr;
struct sockaddr_storage dest_addr_;
Trigger<> *listening_trigger_ = new Trigger<>();
Trigger<> *start_trigger_ = new Trigger<>();
Trigger<> *wake_word_detected_trigger_ = new Trigger<>();
Trigger<std::string> *stt_end_trigger_ = new Trigger<std::string>();
Trigger<std::string> *tts_start_trigger_ = new Trigger<std::string>();
Trigger<std::string> *tts_end_trigger_ = new Trigger<std::string>();
@ -86,35 +129,61 @@ class VoiceAssistant : public Component {
microphone::Microphone *mic_{nullptr};
#ifdef USE_SPEAKER
speaker::Speaker *speaker_{nullptr};
uint8_t *speaker_buffer_;
size_t speaker_buffer_index_{0};
size_t speaker_buffer_size_{0};
#endif
#ifdef USE_MEDIA_PLAYER
media_player::MediaPlayer *media_player_{nullptr};
bool playing_tts_{false};
#endif
bool local_output_{false};
std::string conversation_id_{""};
bool running_{false};
HighFrequencyLoopRequester high_freq_;
#ifdef USE_ESP_ADF
vad_handle_t vad_instance_;
ringbuf_handle_t ring_buffer_;
uint8_t vad_threshold_{5};
uint8_t vad_counter_{0};
#endif
bool use_wake_word_;
uint8_t noise_suppression_level_;
uint8_t auto_gain_;
float volume_multiplier_;
uint8_t *send_buffer_;
int16_t *input_buffer_;
bool continuous_{false};
bool silence_detection_;
State state_{State::IDLE};
State desired_state_{State::IDLE};
};
template<typename... Ts> class StartAction : public Action<Ts...>, public Parented<VoiceAssistant> {
public:
void play(Ts... x) override { this->parent_->request_start(); }
void play(Ts... x) override { this->parent_->request_start(false, this->silence_detection_); }
void set_silence_detection(bool silence_detection) { this->silence_detection_ = silence_detection; }
protected:
bool silence_detection_;
};
template<typename... Ts> class StartContinuousAction : public Action<Ts...>, public Parented<VoiceAssistant> {
public:
void play(Ts... x) override { this->parent_->request_start(true); }
void play(Ts... x) override { this->parent_->request_start(true, true); }
};
template<typename... Ts> class StopAction : public Action<Ts...>, public Parented<VoiceAssistant> {
public:
void play(Ts... x) override {
this->parent_->set_continuous(false);
this->parent_->signal_stop();
}
void play(Ts... x) override { this->parent_->request_stop(); }
};
template<typename... Ts> class IsRunningCondition : public Condition<Ts...>, public Parented<VoiceAssistant> {

View File

@ -767,6 +767,9 @@ speaker:
voice_assistant:
microphone: mic_id_external
speaker: speaker_id
on_listening:
- logger.log: "Voice assistant microphone listening"
on_start:
- logger.log: "Voice assistant started"
on_stt_end: