Add more VA triggers (#5762)

This commit is contained in:
Keith Burzinski 2023-11-15 21:29:50 -06:00 committed by Jesse Hills
parent c536c976b7
commit 4ac49907ca
No known key found for this signature in database
GPG Key ID: BEAAE804EFD8E83A
3 changed files with 91 additions and 25 deletions

View File

@ -18,20 +18,25 @@ DEPENDENCIES = ["api", "microphone"]
CODEOWNERS = ["@jesserockz"] CODEOWNERS = ["@jesserockz"]
CONF_SILENCE_DETECTION = "silence_detection"
CONF_ON_LISTENING = "on_listening"
CONF_ON_START = "on_start"
CONF_ON_WAKE_WORD_DETECTED = "on_wake_word_detected"
CONF_ON_STT_END = "on_stt_end"
CONF_ON_TTS_START = "on_tts_start"
CONF_ON_TTS_END = "on_tts_end"
CONF_ON_END = "on_end" CONF_ON_END = "on_end"
CONF_ON_ERROR = "on_error" CONF_ON_ERROR = "on_error"
CONF_ON_INTENT_END = "on_intent_end"
CONF_ON_INTENT_START = "on_intent_start"
CONF_ON_LISTENING = "on_listening"
CONF_ON_START = "on_start"
CONF_ON_STT_END = "on_stt_end"
CONF_ON_STT_VAD_END = "on_stt_vad_end"
CONF_ON_STT_VAD_START = "on_stt_vad_start"
CONF_ON_TTS_END = "on_tts_end"
CONF_ON_TTS_START = "on_tts_start"
CONF_ON_WAKE_WORD_DETECTED = "on_wake_word_detected"
CONF_SILENCE_DETECTION = "silence_detection"
CONF_USE_WAKE_WORD = "use_wake_word" CONF_USE_WAKE_WORD = "use_wake_word"
CONF_VAD_THRESHOLD = "vad_threshold" CONF_VAD_THRESHOLD = "vad_threshold"
CONF_NOISE_SUPPRESSION_LEVEL = "noise_suppression_level"
CONF_AUTO_GAIN = "auto_gain" CONF_AUTO_GAIN = "auto_gain"
CONF_NOISE_SUPPRESSION_LEVEL = "noise_suppression_level"
CONF_VOLUME_MULTIPLIER = "volume_multiplier" CONF_VOLUME_MULTIPLIER = "volume_multiplier"
@ -88,6 +93,18 @@ CONFIG_SCHEMA = cv.All(
cv.Optional(CONF_ON_CLIENT_DISCONNECTED): automation.validate_automation( cv.Optional(CONF_ON_CLIENT_DISCONNECTED): automation.validate_automation(
single=True single=True
), ),
cv.Optional(CONF_ON_INTENT_START): automation.validate_automation(
single=True
),
cv.Optional(CONF_ON_INTENT_END): automation.validate_automation(
single=True
),
cv.Optional(CONF_ON_STT_VAD_START): automation.validate_automation(
single=True
),
cv.Optional(CONF_ON_STT_VAD_END): automation.validate_automation(
single=True
),
} }
).extend(cv.COMPONENT_SCHEMA), ).extend(cv.COMPONENT_SCHEMA),
) )
@ -177,6 +194,34 @@ async def to_code(config):
config[CONF_ON_CLIENT_DISCONNECTED], config[CONF_ON_CLIENT_DISCONNECTED],
) )
if CONF_ON_INTENT_START in config:
await automation.build_automation(
var.get_intent_start_trigger(),
[],
config[CONF_ON_INTENT_START],
)
if CONF_ON_INTENT_END in config:
await automation.build_automation(
var.get_intent_end_trigger(),
[],
config[CONF_ON_INTENT_END],
)
if CONF_ON_STT_VAD_START in config:
await automation.build_automation(
var.get_stt_vad_start_trigger(),
[],
config[CONF_ON_STT_VAD_START],
)
if CONF_ON_STT_VAD_END in config:
await automation.build_automation(
var.get_stt_vad_end_trigger(),
[],
config[CONF_ON_STT_VAD_END],
)
cg.add_define("USE_VOICE_ASSISTANT") cg.add_define("USE_VOICE_ASSISTANT")

View File

@ -31,7 +31,7 @@ void VoiceAssistant::setup() {
this->socket_ = socket::socket(AF_INET, SOCK_DGRAM, IPPROTO_IP); this->socket_ = socket::socket(AF_INET, SOCK_DGRAM, IPPROTO_IP);
if (socket_ == nullptr) { if (socket_ == nullptr) {
ESP_LOGW(TAG, "Could not create socket."); ESP_LOGW(TAG, "Could not create socket");
this->mark_failed(); this->mark_failed();
return; return;
} }
@ -69,7 +69,7 @@ void VoiceAssistant::setup() {
ExternalRAMAllocator<uint8_t> speaker_allocator(ExternalRAMAllocator<uint8_t>::ALLOW_FAILURE); ExternalRAMAllocator<uint8_t> speaker_allocator(ExternalRAMAllocator<uint8_t>::ALLOW_FAILURE);
this->speaker_buffer_ = speaker_allocator.allocate(SPEAKER_BUFFER_SIZE); this->speaker_buffer_ = speaker_allocator.allocate(SPEAKER_BUFFER_SIZE);
if (this->speaker_buffer_ == nullptr) { if (this->speaker_buffer_ == nullptr) {
ESP_LOGW(TAG, "Could not allocate speaker buffer."); ESP_LOGW(TAG, "Could not allocate speaker buffer");
this->mark_failed(); this->mark_failed();
return; return;
} }
@ -79,7 +79,7 @@ void VoiceAssistant::setup() {
ExternalRAMAllocator<int16_t> allocator(ExternalRAMAllocator<int16_t>::ALLOW_FAILURE); ExternalRAMAllocator<int16_t> allocator(ExternalRAMAllocator<int16_t>::ALLOW_FAILURE);
this->input_buffer_ = allocator.allocate(INPUT_BUFFER_SIZE); this->input_buffer_ = allocator.allocate(INPUT_BUFFER_SIZE);
if (this->input_buffer_ == nullptr) { if (this->input_buffer_ == nullptr) {
ESP_LOGW(TAG, "Could not allocate input buffer."); ESP_LOGW(TAG, "Could not allocate input buffer");
this->mark_failed(); this->mark_failed();
return; return;
} }
@ -89,7 +89,7 @@ void VoiceAssistant::setup() {
this->ring_buffer_ = rb_create(BUFFER_SIZE, sizeof(int16_t)); this->ring_buffer_ = rb_create(BUFFER_SIZE, sizeof(int16_t));
if (this->ring_buffer_ == nullptr) { if (this->ring_buffer_ == nullptr) {
ESP_LOGW(TAG, "Could not allocate ring buffer."); ESP_LOGW(TAG, "Could not allocate ring buffer");
this->mark_failed(); this->mark_failed();
return; return;
} }
@ -98,7 +98,7 @@ void VoiceAssistant::setup() {
ExternalRAMAllocator<uint8_t> send_allocator(ExternalRAMAllocator<uint8_t>::ALLOW_FAILURE); ExternalRAMAllocator<uint8_t> send_allocator(ExternalRAMAllocator<uint8_t>::ALLOW_FAILURE);
this->send_buffer_ = send_allocator.allocate(SEND_BUFFER_SIZE); this->send_buffer_ = send_allocator.allocate(SEND_BUFFER_SIZE);
if (send_buffer_ == nullptr) { if (send_buffer_ == nullptr) {
ESP_LOGW(TAG, "Could not allocate send buffer."); ESP_LOGW(TAG, "Could not allocate send buffer");
this->mark_failed(); this->mark_failed();
return; return;
} }
@ -221,8 +221,8 @@ void VoiceAssistant::loop() {
msg.audio_settings = audio_settings; msg.audio_settings = audio_settings;
if (this->api_client_ == nullptr || !this->api_client_->send_voice_assistant_request(msg)) { if (this->api_client_ == nullptr || !this->api_client_->send_voice_assistant_request(msg)) {
ESP_LOGW(TAG, "Could not request start."); ESP_LOGW(TAG, "Could not request start");
this->error_trigger_->trigger("not-connected", "Could not request start."); this->error_trigger_->trigger("not-connected", "Could not request start");
this->continuous_ = false; this->continuous_ = false;
this->set_state_(State::IDLE, State::IDLE); this->set_state_(State::IDLE, State::IDLE);
break; break;
@ -280,7 +280,7 @@ void VoiceAssistant::loop() {
this->speaker_buffer_size_ += len; this->speaker_buffer_size_ += len;
} }
} else { } else {
ESP_LOGW(TAG, "Receive buffer full."); ESP_LOGW(TAG, "Receive buffer full");
} }
if (this->speaker_buffer_size_ > 0) { if (this->speaker_buffer_size_ > 0) {
size_t written = this->speaker_->play(this->speaker_buffer_, this->speaker_buffer_size_); size_t written = this->speaker_->play(this->speaker_buffer_, this->speaker_buffer_size_);
@ -290,7 +290,7 @@ void VoiceAssistant::loop() {
this->speaker_buffer_index_ -= written; this->speaker_buffer_index_ -= written;
this->set_timeout("speaker-timeout", 2000, [this]() { this->speaker_->stop(); }); this->set_timeout("speaker-timeout", 2000, [this]() { this->speaker_->stop(); });
} else { } else {
ESP_LOGW(TAG, "Speaker buffer full."); ESP_LOGW(TAG, "Speaker buffer full");
} }
} }
if (this->wait_for_stream_end_) { if (this->wait_for_stream_end_) {
@ -513,7 +513,7 @@ void VoiceAssistant::on_event(const api::VoiceAssistantEventResponse &msg) {
break; break;
} }
case api::enums::VOICE_ASSISTANT_STT_START: case api::enums::VOICE_ASSISTANT_STT_START:
ESP_LOGD(TAG, "STT Started"); ESP_LOGD(TAG, "STT started");
this->listening_trigger_->trigger(); this->listening_trigger_->trigger();
break; break;
case api::enums::VOICE_ASSISTANT_STT_END: { case api::enums::VOICE_ASSISTANT_STT_END: {
@ -525,19 +525,24 @@ void VoiceAssistant::on_event(const api::VoiceAssistantEventResponse &msg) {
} }
} }
if (text.empty()) { if (text.empty()) {
ESP_LOGW(TAG, "No text in STT_END event."); ESP_LOGW(TAG, "No text in STT_END event");
return; return;
} }
ESP_LOGD(TAG, "Speech recognised as: \"%s\"", text.c_str()); ESP_LOGD(TAG, "Speech recognised as: \"%s\"", text.c_str());
this->stt_end_trigger_->trigger(text); this->stt_end_trigger_->trigger(text);
break; break;
} }
case api::enums::VOICE_ASSISTANT_INTENT_START:
ESP_LOGD(TAG, "Intent started");
this->intent_start_trigger_->trigger();
break;
case api::enums::VOICE_ASSISTANT_INTENT_END: { case api::enums::VOICE_ASSISTANT_INTENT_END: {
for (auto arg : msg.data) { for (auto arg : msg.data) {
if (arg.name == "conversation_id") { if (arg.name == "conversation_id") {
this->conversation_id_ = std::move(arg.value); this->conversation_id_ = std::move(arg.value);
} }
} }
this->intent_end_trigger_->trigger();
break; break;
} }
case api::enums::VOICE_ASSISTANT_TTS_START: { case api::enums::VOICE_ASSISTANT_TTS_START: {
@ -548,7 +553,7 @@ void VoiceAssistant::on_event(const api::VoiceAssistantEventResponse &msg) {
} }
} }
if (text.empty()) { if (text.empty()) {
ESP_LOGW(TAG, "No text in TTS_START event."); ESP_LOGW(TAG, "No text in TTS_START event");
return; return;
} }
ESP_LOGD(TAG, "Response: \"%s\"", text.c_str()); ESP_LOGD(TAG, "Response: \"%s\"", text.c_str());
@ -566,7 +571,7 @@ void VoiceAssistant::on_event(const api::VoiceAssistantEventResponse &msg) {
} }
} }
if (url.empty()) { if (url.empty()) {
ESP_LOGW(TAG, "No url in TTS_END event."); ESP_LOGW(TAG, "No url in TTS_END event");
return; return;
} }
ESP_LOGD(TAG, "Response URL: \"%s\"", url.c_str()); ESP_LOGD(TAG, "Response URL: \"%s\"", url.c_str());
@ -634,6 +639,14 @@ void VoiceAssistant::on_event(const api::VoiceAssistantEventResponse &msg) {
this->set_state_(State::RESPONSE_FINISHED, State::IDLE); this->set_state_(State::RESPONSE_FINISHED, State::IDLE);
break; break;
} }
case api::enums::VOICE_ASSISTANT_STT_VAD_START:
ESP_LOGD(TAG, "Starting STT by VAD");
this->stt_vad_start_trigger_->trigger();
break;
case api::enums::VOICE_ASSISTANT_STT_VAD_END:
ESP_LOGD(TAG, "STT by VAD end");
this->stt_vad_end_trigger_->trigger();
break;
default: default:
ESP_LOGD(TAG, "Unhandled event type: %d", msg.event_type); ESP_LOGD(TAG, "Unhandled event type: %d", msg.event_type);
break; break;

View File

@ -100,13 +100,17 @@ class VoiceAssistant : public Component {
void set_auto_gain(uint8_t auto_gain) { this->auto_gain_ = auto_gain; } void set_auto_gain(uint8_t auto_gain) { this->auto_gain_ = auto_gain; }
void set_volume_multiplier(float volume_multiplier) { this->volume_multiplier_ = volume_multiplier; } void set_volume_multiplier(float volume_multiplier) { this->volume_multiplier_ = volume_multiplier; }
Trigger<> *get_intent_end_trigger() const { return this->intent_end_trigger_; }
Trigger<> *get_intent_start_trigger() const { return this->intent_start_trigger_; }
Trigger<> *get_listening_trigger() const { return this->listening_trigger_; } Trigger<> *get_listening_trigger() const { return this->listening_trigger_; }
Trigger<> *get_end_trigger() const { return this->end_trigger_; }
Trigger<> *get_start_trigger() const { return this->start_trigger_; } Trigger<> *get_start_trigger() const { return this->start_trigger_; }
Trigger<> *get_stt_vad_end_trigger() const { return this->stt_vad_end_trigger_; }
Trigger<> *get_stt_vad_start_trigger() const { return this->stt_vad_start_trigger_; }
Trigger<> *get_wake_word_detected_trigger() const { return this->wake_word_detected_trigger_; } Trigger<> *get_wake_word_detected_trigger() const { return this->wake_word_detected_trigger_; }
Trigger<std::string> *get_stt_end_trigger() const { return this->stt_end_trigger_; } Trigger<std::string> *get_stt_end_trigger() const { return this->stt_end_trigger_; }
Trigger<std::string> *get_tts_start_trigger() const { return this->tts_start_trigger_; }
Trigger<std::string> *get_tts_end_trigger() const { return this->tts_end_trigger_; } Trigger<std::string> *get_tts_end_trigger() const { return this->tts_end_trigger_; }
Trigger<> *get_end_trigger() const { return this->end_trigger_; } Trigger<std::string> *get_tts_start_trigger() const { return this->tts_start_trigger_; }
Trigger<std::string, std::string> *get_error_trigger() const { return this->error_trigger_; } Trigger<std::string, std::string> *get_error_trigger() const { return this->error_trigger_; }
Trigger<> *get_client_connected_trigger() const { return this->client_connected_trigger_; } Trigger<> *get_client_connected_trigger() const { return this->client_connected_trigger_; }
@ -124,13 +128,17 @@ class VoiceAssistant : public Component {
std::unique_ptr<socket::Socket> socket_ = nullptr; std::unique_ptr<socket::Socket> socket_ = nullptr;
struct sockaddr_storage dest_addr_; struct sockaddr_storage dest_addr_;
Trigger<> *intent_end_trigger_ = new Trigger<>();
Trigger<> *intent_start_trigger_ = new Trigger<>();
Trigger<> *listening_trigger_ = new Trigger<>(); Trigger<> *listening_trigger_ = new Trigger<>();
Trigger<> *end_trigger_ = new Trigger<>();
Trigger<> *start_trigger_ = new Trigger<>(); Trigger<> *start_trigger_ = new Trigger<>();
Trigger<> *stt_vad_start_trigger_ = new Trigger<>();
Trigger<> *stt_vad_end_trigger_ = new Trigger<>();
Trigger<> *wake_word_detected_trigger_ = new Trigger<>(); Trigger<> *wake_word_detected_trigger_ = new Trigger<>();
Trigger<std::string> *stt_end_trigger_ = new Trigger<std::string>(); Trigger<std::string> *stt_end_trigger_ = new Trigger<std::string>();
Trigger<std::string> *tts_start_trigger_ = new Trigger<std::string>();
Trigger<std::string> *tts_end_trigger_ = new Trigger<std::string>(); Trigger<std::string> *tts_end_trigger_ = new Trigger<std::string>();
Trigger<> *end_trigger_ = new Trigger<>(); Trigger<std::string> *tts_start_trigger_ = new Trigger<std::string>();
Trigger<std::string, std::string> *error_trigger_ = new Trigger<std::string, std::string>(); Trigger<std::string, std::string> *error_trigger_ = new Trigger<std::string, std::string>();
Trigger<> *client_connected_trigger_ = new Trigger<>(); Trigger<> *client_connected_trigger_ = new Trigger<>();