ESPHome 2026.1.5
Loading...
Searching...
No Matches
voice_assistant.h
Go to the documentation of this file.
1#pragma once
2
4
5#ifdef USE_VOICE_ASSISTANT
6
11
15#ifdef USE_MEDIA_PLAYER
17#endif
18#ifdef USE_MICRO_WAKE_WORD
20#endif
21#ifdef USE_SPEAKER
23#endif
25
26#include <span>
27#include <unordered_map>
28#include <vector>
29
30namespace esphome {
31namespace voice_assistant {
32
33// Version 1: Initial version
34// Version 2: Adds raw speaker support
35static const uint32_t LEGACY_INITIAL_VERSION = 1;
36static const uint32_t LEGACY_SPEAKER_SUPPORT = 2;
37
46
62
67
68struct Timer {
69 std::string id;
70 std::string name;
71 uint32_t total_seconds;
72 uint32_t seconds_left;
74
76 static constexpr size_t TO_STR_BUFFER_SIZE = 128;
78 const char *to_str(std::span<char, TO_STR_BUFFER_SIZE> buffer) const {
79 snprintf(buffer.data(), buffer.size(),
80 "Timer(id=%s, name=%s, total_seconds=%" PRIu32 ", seconds_left=%" PRIu32 ", is_active=%s)",
81 this->id.c_str(), this->name.c_str(), this->total_seconds, this->seconds_left, YESNO(this->is_active));
82 return buffer.data();
83 }
84 std::string to_string() const {
85 char buffer[TO_STR_BUFFER_SIZE];
86 return this->to_str(buffer);
87 }
88};
89
90struct WakeWord {
91 std::string id;
92 std::string wake_word;
93 std::vector<std::string> trained_languages;
94};
95
97 std::vector<WakeWord> available_wake_words;
98 std::vector<std::string> active_wake_words;
100};
101
102#ifdef USE_MEDIA_PLAYER
104 IDLE,
105 URL_SENT,
106 PLAYING,
107 FINISHED,
108};
109#endif
110
111class VoiceAssistant : public Component {
112 public:
114
115 void loop() override;
116 void setup() override;
117 float get_setup_priority() const override;
118 void start_streaming();
119 void start_streaming(struct sockaddr_storage *addr, uint16_t port);
120 void failed_to_start();
121
122 void set_microphone_source(microphone::MicrophoneSource *mic_source) { this->mic_source_ = mic_source; }
123#ifdef USE_MICRO_WAKE_WORD
125#endif
126#ifdef USE_SPEAKER
128 this->speaker_ = speaker;
129 this->local_output_ = true;
130 }
131#endif
132#ifdef USE_MEDIA_PLAYER
134 this->media_player_ = media_player;
135 this->local_output_ = true;
136 }
137#endif
138
139 uint32_t get_legacy_version() const {
140#ifdef USE_SPEAKER
141 if (this->speaker_ != nullptr) {
142 return LEGACY_SPEAKER_SUPPORT;
143 }
144#endif
145 return LEGACY_INITIAL_VERSION;
146 }
147
148 uint32_t get_feature_flags() const {
149 uint32_t flags = 0;
152#ifdef USE_SPEAKER
153 if (this->speaker_ != nullptr) {
155 }
156#endif
157
158 if (this->has_timers_) {
160 }
161
162#ifdef USE_MEDIA_PLAYER
163 if (this->media_player_ != nullptr) {
166 }
167#endif
168
169 return flags;
170 }
171
172 void request_start(bool continuous, bool silence_detection);
173 void request_stop();
174
176 void on_audio(const api::VoiceAssistantAudio &msg);
179 void on_set_configuration(const std::vector<std::string> &active_wake_words);
181
182 bool is_running() const { return this->state_ != State::IDLE; }
183 void set_continuous(bool continuous) { this->continuous_ = continuous; }
184 bool is_continuous() const { return this->continuous_; }
185
186 void set_use_wake_word(bool use_wake_word) { this->use_wake_word_ = use_wake_word; }
187
188 void set_noise_suppression_level(uint8_t noise_suppression_level) {
189 this->noise_suppression_level_ = noise_suppression_level;
190 }
191 void set_auto_gain(uint8_t auto_gain) { this->auto_gain_ = auto_gain; }
192 void set_volume_multiplier(float volume_multiplier) { this->volume_multiplier_ = volume_multiplier; }
193 void set_conversation_timeout(uint32_t conversation_timeout) { this->conversation_timeout_ = conversation_timeout; }
195
200 Trigger<> *get_end_trigger() const { return this->end_trigger_; }
201 Trigger<> *get_start_trigger() const { return this->start_trigger_; }
204#ifdef USE_SPEAKER
207#endif
213 Trigger<> *get_idle_trigger() const { return this->idle_trigger_; }
214
217
218 void client_subscription(api::APIConnection *client, bool subscribe);
220
221 void set_wake_word(const std::string &wake_word) { this->wake_word_ = wake_word; }
222
228 void set_has_timers(bool has_timers) { this->has_timers_ = has_timers; }
229 const std::unordered_map<std::string, Timer> &get_timers() const { return this->timers_; }
230
231 protected:
232 bool allocate_buffers_();
233 void clear_buffers_();
234 void deallocate_buffers_();
235
236 void set_state_(State state);
237 void set_state_(State state, State desired_state);
238 void signal_stop_();
240
241 std::unique_ptr<socket::Socket> socket_ = nullptr;
243
251#ifdef USE_SPEAKER
254#endif
262
265
267
268 std::unordered_map<std::string, Timer> timers_;
269 void timer_tick_();
275 bool has_timers_{false};
277
279#ifdef USE_SPEAKER
280 void write_speaker_();
282 uint8_t *speaker_buffer_{nullptr};
287 bool stream_ended_{false};
288#endif
289#ifdef USE_MEDIA_PLAYER
291 std::string tts_response_url_{""};
293
295#endif
296
297 bool local_output_{false};
298
299 std::string conversation_id_{""};
300
301 std::string wake_word_{""};
302
303 std::shared_ptr<RingBuffer> ring_buffer_;
304
307 uint8_t auto_gain_;
310
311 uint8_t *send_buffer_{nullptr};
312
313 bool continuous_{false};
315
317
320
323 bool start_udp_socket_();
324
326
327#ifdef USE_MICRO_WAKE_WORD
329#endif
330};
331
332template<typename... Ts> class StartAction : public Action<Ts...>, public Parented<VoiceAssistant> {
333 TEMPLATABLE_VALUE(std::string, wake_word);
334
335 public:
336 void play(const Ts &...x) override {
337 this->parent_->set_wake_word(this->wake_word_.value(x...));
338 this->parent_->request_start(false, this->silence_detection_);
339 }
340
341 void set_silence_detection(bool silence_detection) { this->silence_detection_ = silence_detection; }
342
343 protected:
345};
346
347template<typename... Ts> class StartContinuousAction : public Action<Ts...>, public Parented<VoiceAssistant> {
348 public:
349 void play(const Ts &...x) override { this->parent_->request_start(true, true); }
350};
351
352template<typename... Ts> class StopAction : public Action<Ts...>, public Parented<VoiceAssistant> {
353 public:
354 void play(const Ts &...x) override { this->parent_->request_stop(); }
355};
356
357template<typename... Ts> class IsRunningCondition : public Condition<Ts...>, public Parented<VoiceAssistant> {
358 public:
359 bool check(const Ts &...x) override { return this->parent_->is_running() || this->parent_->is_continuous(); }
360};
361
362template<typename... Ts> class ConnectedCondition : public Condition<Ts...>, public Parented<VoiceAssistant> {
363 public:
364 bool check(const Ts &...x) override { return this->parent_->get_api_connection() != nullptr; }
365};
366
367extern VoiceAssistant *global_voice_assistant; // NOLINT(cppcoreguidelines-avoid-non-const-global-variables)
368
369} // namespace voice_assistant
370} // namespace esphome
371
372#endif // USE_VOICE_ASSISTANT
Base class for all automation conditions.
Definition automation.h:217
Helper class to easily give an object a parent of type T.
Definition helpers.h:1246
void play(const Ts &...x) override
void set_silence_detection(bool silence_detection)
void play(const Ts &...x) override
Trigger< std::string > * get_stt_end_trigger() const
std::unique_ptr< socket::Socket > socket_
void set_conversation_timeout(uint32_t conversation_timeout)
Trigger< std::string, std::string > * get_error_trigger() const
Trigger< std::vector< Timer > > * get_timer_tick_trigger() const
std::unordered_map< std::string, Timer > timers_
Trigger< Timer > * get_timer_finished_trigger() const
void on_timer_event(const api::VoiceAssistantTimerEventResponse &msg)
Trigger< std::string > * get_tts_end_trigger() const
void on_audio(const api::VoiceAssistantAudio &msg)
Trigger< Timer > * get_timer_updated_trigger() const
media_player::MediaPlayer * media_player_
Trigger< Timer > * get_timer_cancelled_trigger() const
Trigger< std::string, std::string > * error_trigger_
void set_media_player(media_player::MediaPlayer *media_player)
void client_subscription(api::APIConnection *client, bool subscribe)
MediaPlayerResponseState media_player_response_state_
Trigger< std::vector< Timer > > * timer_tick_trigger_
std::shared_ptr< RingBuffer > ring_buffer_
void on_event(const api::VoiceAssistantEventResponse &msg)
Trigger< std::string > * tts_start_trigger_
void on_announce(const api::VoiceAssistantAnnounceRequest &msg)
void request_start(bool continuous, bool silence_detection)
void set_speaker(speaker::Speaker *speaker)
api::APIConnection * get_api_connection() const
void set_microphone_source(microphone::MicrophoneSource *mic_source)
void set_wake_word(const std::string &wake_word)
Trigger< Timer > * get_timer_started_trigger() const
void set_micro_wake_word(micro_wake_word::MicroWakeWord *mww)
void set_volume_multiplier(float volume_multiplier)
const std::unordered_map< std::string, Timer > & get_timers() const
Trigger< std::string > * intent_progress_trigger_
microphone::MicrophoneSource * mic_source_
micro_wake_word::MicroWakeWord * micro_wake_word_
void set_noise_suppression_level(uint8_t noise_suppression_level)
Trigger< std::string > * get_tts_start_trigger() const
Trigger< std::string > * get_intent_progress_trigger() const
void on_set_configuration(const std::vector< std::string > &active_wake_words)
uint16_t flags
bool state
Definition fan.h:0
VoiceAssistant * global_voice_assistant
Providing packet encoding functions for exchanging data with a remote host.
Definition a01nyub.cpp:7
std::vector< WakeWord > available_wake_words
std::vector< std::string > active_wake_words
static constexpr size_t TO_STR_BUFFER_SIZE
Buffer size for to_str() - sufficient for typical timer names.
const char * to_str(std::span< char, TO_STR_BUFFER_SIZE > buffer) const
Format to buffer, returns pointer to buffer (may truncate long names)
std::vector< std::string > trained_languages
uint16_t x
Definition tt21100.cpp:5