ESPHome 2025.9.0-dev
Loading...
Searching...
No Matches
voice_assistant.h
Go to the documentation of this file.
1#pragma once
2
4
5#ifdef USE_VOICE_ASSISTANT
6
11
15#ifdef USE_MEDIA_PLAYER
17#endif
18#ifdef USE_MICRO_WAKE_WORD
20#endif
21#ifdef USE_SPEAKER
23#endif
25
26#include <unordered_map>
27#include <vector>
28
29namespace esphome {
30namespace voice_assistant {
31
32// Version 1: Initial version
33// Version 2: Adds raw speaker support
34static const uint32_t LEGACY_INITIAL_VERSION = 1;
35static const uint32_t LEGACY_SPEAKER_SUPPORT = 2;
36
45
61
66
67struct Timer {
68 std::string id;
69 std::string name;
70 uint32_t total_seconds;
71 uint32_t seconds_left;
73
74 std::string to_string() const {
75 return str_sprintf("Timer(id=%s, name=%s, total_seconds=%" PRIu32 ", seconds_left=%" PRIu32 ", is_active=%s)",
76 this->id.c_str(), this->name.c_str(), this->total_seconds, this->seconds_left,
77 YESNO(this->is_active));
78 }
79};
80
81struct WakeWord {
82 std::string id;
83 std::string wake_word;
84 std::vector<std::string> trained_languages;
85};
86
88 std::vector<WakeWord> available_wake_words;
89 std::vector<std::string> active_wake_words;
91};
92
93#ifdef USE_MEDIA_PLAYER
95 IDLE,
97 PLAYING,
99};
100#endif
101
102class VoiceAssistant : public Component {
103 public:
105
106 void loop() override;
107 void setup() override;
108 float get_setup_priority() const override;
109 void start_streaming();
110 void start_streaming(struct sockaddr_storage *addr, uint16_t port);
111 void failed_to_start();
112
113 void set_microphone_source(microphone::MicrophoneSource *mic_source) { this->mic_source_ = mic_source; }
114#ifdef USE_MICRO_WAKE_WORD
116#endif
117#ifdef USE_SPEAKER
119 this->speaker_ = speaker;
120 this->local_output_ = true;
121 }
122#endif
123#ifdef USE_MEDIA_PLAYER
125 this->media_player_ = media_player;
126 this->local_output_ = true;
127 }
128#endif
129
130 uint32_t get_legacy_version() const {
131#ifdef USE_SPEAKER
132 if (this->speaker_ != nullptr) {
133 return LEGACY_SPEAKER_SUPPORT;
134 }
135#endif
136 return LEGACY_INITIAL_VERSION;
137 }
138
139 uint32_t get_feature_flags() const {
140 uint32_t flags = 0;
143#ifdef USE_SPEAKER
144 if (this->speaker_ != nullptr) {
146 }
147#endif
148
149 if (this->has_timers_) {
151 }
152
153#ifdef USE_MEDIA_PLAYER
154 if (this->media_player_ != nullptr) {
157 }
158#endif
159
160 return flags;
161 }
162
163 void request_start(bool continuous, bool silence_detection);
164 void request_stop();
165
167 void on_audio(const api::VoiceAssistantAudio &msg);
170 void on_set_configuration(const std::vector<std::string> &active_wake_words);
172
173 bool is_running() const { return this->state_ != State::IDLE; }
174 void set_continuous(bool continuous) { this->continuous_ = continuous; }
175 bool is_continuous() const { return this->continuous_; }
176
177 void set_use_wake_word(bool use_wake_word) { this->use_wake_word_ = use_wake_word; }
178
179 void set_noise_suppression_level(uint8_t noise_suppression_level) {
180 this->noise_suppression_level_ = noise_suppression_level;
181 }
182 void set_auto_gain(uint8_t auto_gain) { this->auto_gain_ = auto_gain; }
183 void set_volume_multiplier(float volume_multiplier) { this->volume_multiplier_ = volume_multiplier; }
184 void set_conversation_timeout(uint32_t conversation_timeout) { this->conversation_timeout_ = conversation_timeout; }
186
191 Trigger<> *get_end_trigger() const { return this->end_trigger_; }
192 Trigger<> *get_start_trigger() const { return this->start_trigger_; }
195#ifdef USE_SPEAKER
198#endif
204 Trigger<> *get_idle_trigger() const { return this->idle_trigger_; }
205
208
209 void client_subscription(api::APIConnection *client, bool subscribe);
211
212 void set_wake_word(const std::string &wake_word) { this->wake_word_ = wake_word; }
213
219 void set_has_timers(bool has_timers) { this->has_timers_ = has_timers; }
220 const std::unordered_map<std::string, Timer> &get_timers() const { return this->timers_; }
221
222 protected:
223 bool allocate_buffers_();
224 void clear_buffers_();
225 void deallocate_buffers_();
226
227 void set_state_(State state);
228 void set_state_(State state, State desired_state);
229 void signal_stop_();
231
232 std::unique_ptr<socket::Socket> socket_ = nullptr;
234
242#ifdef USE_SPEAKER
245#endif
253
256
258
259 std::unordered_map<std::string, Timer> timers_;
260 void timer_tick_();
266 bool has_timers_{false};
268
270#ifdef USE_SPEAKER
271 void write_speaker_();
273 uint8_t *speaker_buffer_{nullptr};
278 bool stream_ended_{false};
279#endif
280#ifdef USE_MEDIA_PLAYER
282 std::string tts_response_url_{""};
284
286#endif
287
288 bool local_output_{false};
289
290 std::string conversation_id_{""};
291
292 std::string wake_word_{""};
293
294 std::shared_ptr<RingBuffer> ring_buffer_;
295
298 uint8_t auto_gain_;
301
302 uint8_t *send_buffer_{nullptr};
303
304 bool continuous_{false};
306
308
311
314 bool start_udp_socket_();
315
317
318#ifdef USE_MICRO_WAKE_WORD
320#endif
321};
322
323template<typename... Ts> class StartAction : public Action<Ts...>, public Parented<VoiceAssistant> {
324 TEMPLATABLE_VALUE(std::string, wake_word);
325
326 public:
327 void play(Ts... x) override {
328 this->parent_->set_wake_word(this->wake_word_.value(x...));
329 this->parent_->request_start(false, this->silence_detection_);
330 }
331
332 void set_silence_detection(bool silence_detection) { this->silence_detection_ = silence_detection; }
333
334 protected:
336};
337
338template<typename... Ts> class StartContinuousAction : public Action<Ts...>, public Parented<VoiceAssistant> {
339 public:
340 void play(Ts... x) override { this->parent_->request_start(true, true); }
341};
342
343template<typename... Ts> class StopAction : public Action<Ts...>, public Parented<VoiceAssistant> {
344 public:
345 void play(Ts... x) override { this->parent_->request_stop(); }
346};
347
348template<typename... Ts> class IsRunningCondition : public Condition<Ts...>, public Parented<VoiceAssistant> {
349 public:
350 bool check(Ts... x) override { return this->parent_->is_running() || this->parent_->is_continuous(); }
351};
352
353template<typename... Ts> class ConnectedCondition : public Condition<Ts...>, public Parented<VoiceAssistant> {
354 public:
355 bool check(Ts... x) override { return this->parent_->get_api_connection() != nullptr; }
356};
357
358extern VoiceAssistant *global_voice_assistant; // NOLINT(cppcoreguidelines-avoid-non-const-global-variables)
359
360} // namespace voice_assistant
361} // namespace esphome
362
363#endif // USE_VOICE_ASSISTANT
Base class for all automation conditions.
Definition automation.h:124
Helper class to easily give an object a parent of type T.
Definition helpers.h:656
void set_silence_detection(bool silence_detection)
Trigger< std::string > * get_stt_end_trigger() const
std::unique_ptr< socket::Socket > socket_
void set_conversation_timeout(uint32_t conversation_timeout)
Trigger< std::string, std::string > * get_error_trigger() const
Trigger< std::vector< Timer > > * get_timer_tick_trigger() const
std::unordered_map< std::string, Timer > timers_
Trigger< Timer > * get_timer_finished_trigger() const
void on_timer_event(const api::VoiceAssistantTimerEventResponse &msg)
Trigger< std::string > * get_tts_end_trigger() const
void on_audio(const api::VoiceAssistantAudio &msg)
Trigger< Timer > * get_timer_updated_trigger() const
media_player::MediaPlayer * media_player_
Trigger< Timer > * get_timer_cancelled_trigger() const
Trigger< std::string, std::string > * error_trigger_
void set_media_player(media_player::MediaPlayer *media_player)
void client_subscription(api::APIConnection *client, bool subscribe)
MediaPlayerResponseState media_player_response_state_
Trigger< std::vector< Timer > > * timer_tick_trigger_
std::shared_ptr< RingBuffer > ring_buffer_
void on_event(const api::VoiceAssistantEventResponse &msg)
Trigger< std::string > * tts_start_trigger_
void on_announce(const api::VoiceAssistantAnnounceRequest &msg)
void request_start(bool continuous, bool silence_detection)
void set_speaker(speaker::Speaker *speaker)
api::APIConnection * get_api_connection() const
void set_microphone_source(microphone::MicrophoneSource *mic_source)
void set_wake_word(const std::string &wake_word)
Trigger< Timer > * get_timer_started_trigger() const
void set_micro_wake_word(micro_wake_word::MicroWakeWord *mww)
void set_volume_multiplier(float volume_multiplier)
const std::unordered_map< std::string, Timer > & get_timers() const
Trigger< std::string > * intent_progress_trigger_
microphone::MicrophoneSource * mic_source_
micro_wake_word::MicroWakeWord * micro_wake_word_
void set_noise_suppression_level(uint8_t noise_suppression_level)
Trigger< std::string > * get_tts_start_trigger() const
Trigger< std::string > * get_intent_progress_trigger() const
void on_set_configuration(const std::vector< std::string > &active_wake_words)
bool state
Definition fan.h:0
VoiceAssistant * global_voice_assistant
Providing packet encoding functions for exchanging data with a remote host.
Definition a01nyub.cpp:7
std::string str_sprintf(const char *fmt,...)
Definition helpers.cpp:208
std::vector< WakeWord > available_wake_words
std::vector< std::string > active_wake_words
std::vector< std::string > trained_languages
uint16_t x
Definition tt21100.cpp:5