#pragma once #include "esphome/core/defines.h" #ifdef USE_VOICE_ASSISTANT #include "esphome/core/automation.h" #include "esphome/core/component.h" #include "esphome/core/helpers.h" #include "esphome/components/api/api_pb2.h" #include "esphome/components/api/api_server.h" #include "esphome/components/microphone/microphone.h" #ifdef USE_SPEAKER #include "esphome/components/speaker/speaker.h" #endif #ifdef USE_MEDIA_PLAYER #include "esphome/components/media_player/media_player.h" #endif #include "esphome/components/socket/socket.h" namespace esphome { namespace voice_assistant { // Version 1: Initial version // Version 2: Adds raw speaker support // Version 3: Unused/skip static const uint32_t INITIAL_VERSION = 1; static const uint32_t SPEAKER_SUPPORT = 2; class VoiceAssistant : public Component { public: void setup() override; void loop() override; float get_setup_priority() const override; void start(struct sockaddr_storage *addr, uint16_t port); void set_microphone(microphone::Microphone *mic) { this->mic_ = mic; } #ifdef USE_SPEAKER void set_speaker(speaker::Speaker *speaker) { this->speaker_ = speaker; } #endif #ifdef USE_MEDIA_PLAYER void set_media_player(media_player::MediaPlayer *media_player) { this->media_player_ = media_player; } #endif uint32_t get_version() const { #ifdef USE_SPEAKER if (this->speaker_ != nullptr) { return SPEAKER_SUPPORT; } #endif return INITIAL_VERSION; } void request_start(bool continuous = false); void signal_stop(); void on_event(const api::VoiceAssistantEventResponse &msg); bool is_running() const { return this->running_; } void set_continuous(bool continuous) { this->continuous_ = continuous; } bool is_continuous() const { return this->continuous_; } void set_silence_detection(bool silence_detection) { this->silence_detection_ = silence_detection; } Trigger<> *get_listening_trigger() const { return this->listening_trigger_; } Trigger<> *get_start_trigger() const { return this->start_trigger_; } Trigger *get_stt_end_trigger() const { return this->stt_end_trigger_; } Trigger *get_tts_start_trigger() const { return this->tts_start_trigger_; } Trigger *get_tts_end_trigger() const { return this->tts_end_trigger_; } Trigger<> *get_end_trigger() const { return this->end_trigger_; } Trigger *get_error_trigger() const { return this->error_trigger_; } protected: std::unique_ptr socket_ = nullptr; struct sockaddr_storage dest_addr_; Trigger<> *listening_trigger_ = new Trigger<>(); Trigger<> *start_trigger_ = new Trigger<>(); Trigger *stt_end_trigger_ = new Trigger(); Trigger *tts_start_trigger_ = new Trigger(); Trigger *tts_end_trigger_ = new Trigger(); Trigger<> *end_trigger_ = new Trigger<>(); Trigger *error_trigger_ = new Trigger(); microphone::Microphone *mic_{nullptr}; #ifdef USE_SPEAKER speaker::Speaker *speaker_{nullptr}; #endif #ifdef USE_MEDIA_PLAYER media_player::MediaPlayer *media_player_{nullptr}; bool playing_tts_{false}; #endif std::string conversation_id_{""}; bool running_{false}; bool continuous_{false}; bool silence_detection_; }; template class StartAction : public Action, public Parented { public: void play(Ts... x) override { this->parent_->request_start(); } }; template class StartContinuousAction : public Action, public Parented { public: void play(Ts... x) override { this->parent_->request_start(true); } }; template class StopAction : public Action, public Parented { public: void play(Ts... x) override { this->parent_->set_continuous(false); this->parent_->signal_stop(); } }; template class IsRunningCondition : public Condition, public Parented { public: bool check(Ts... x) override { return this->parent_->is_running() || this->parent_->is_continuous(); } }; extern VoiceAssistant *global_voice_assistant; // NOLINT(cppcoreguidelines-avoid-non-const-global-variables) } // namespace voice_assistant } // namespace esphome #endif // USE_VOICE_ASSISTANT