ESPHome 2025.6.3
Loading...
Searching...
No Matches
voice_assistant.h
Go to the documentation of this file.
1#pragma once
2
4
5#ifdef USE_VOICE_ASSISTANT
6
11
15#ifdef USE_MEDIA_PLAYER
17#endif
18#ifdef USE_MICRO_WAKE_WORD
20#endif
21#ifdef USE_SPEAKER
23#endif
25
26#include <unordered_map>
27#include <vector>
28
29namespace esphome {
30namespace voice_assistant {
31
32// Version 1: Initial version
33// Version 2: Adds raw speaker support
34static const uint32_t LEGACY_INITIAL_VERSION = 1;
35static const uint32_t LEGACY_SPEAKER_SUPPORT = 2;
36
45
61
66
67struct Timer {
68 std::string id;
69 std::string name;
70 uint32_t total_seconds;
71 uint32_t seconds_left;
73
74 std::string to_string() const {
75 return str_sprintf("Timer(id=%s, name=%s, total_seconds=%" PRIu32 ", seconds_left=%" PRIu32 ", is_active=%s)",
76 this->id.c_str(), this->name.c_str(), this->total_seconds, this->seconds_left,
77 YESNO(this->is_active));
78 }
79};
80
81struct WakeWord {
82 std::string id;
83 std::string wake_word;
84 std::vector<std::string> trained_languages;
85};
86
88 std::vector<WakeWord> available_wake_words;
89 std::vector<std::string> active_wake_words;
91};
92
93class VoiceAssistant : public Component {
94 public:
96
97 void loop() override;
98 void setup() override;
99 float get_setup_priority() const override;
100 void start_streaming();
101 void start_streaming(struct sockaddr_storage *addr, uint16_t port);
102 void failed_to_start();
103
104 void set_microphone_source(microphone::MicrophoneSource *mic_source) { this->mic_source_ = mic_source; }
105#ifdef USE_MICRO_WAKE_WORD
107#endif
108#ifdef USE_SPEAKER
110 this->speaker_ = speaker;
111 this->local_output_ = true;
112 }
113#endif
114#ifdef USE_MEDIA_PLAYER
116 this->media_player_ = media_player;
117 this->local_output_ = true;
118 }
119#endif
120
121 uint32_t get_legacy_version() const {
122#ifdef USE_SPEAKER
123 if (this->speaker_ != nullptr) {
124 return LEGACY_SPEAKER_SUPPORT;
125 }
126#endif
127 return LEGACY_INITIAL_VERSION;
128 }
129
130 uint32_t get_feature_flags() const {
131 uint32_t flags = 0;
134#ifdef USE_SPEAKER
135 if (this->speaker_ != nullptr) {
137 }
138#endif
139
140 if (this->has_timers_) {
142 }
143
144#ifdef USE_MEDIA_PLAYER
145 if (this->media_player_ != nullptr) {
148 }
149#endif
150
151 return flags;
152 }
153
154 void request_start(bool continuous, bool silence_detection);
155 void request_stop();
156
158 void on_audio(const api::VoiceAssistantAudio &msg);
161 void on_set_configuration(const std::vector<std::string> &active_wake_words);
163
164 bool is_running() const { return this->state_ != State::IDLE; }
165 void set_continuous(bool continuous) { this->continuous_ = continuous; }
166 bool is_continuous() const { return this->continuous_; }
167
168 void set_use_wake_word(bool use_wake_word) { this->use_wake_word_ = use_wake_word; }
169
170 void set_noise_suppression_level(uint8_t noise_suppression_level) {
171 this->noise_suppression_level_ = noise_suppression_level;
172 }
173 void set_auto_gain(uint8_t auto_gain) { this->auto_gain_ = auto_gain; }
174 void set_volume_multiplier(float volume_multiplier) { this->volume_multiplier_ = volume_multiplier; }
175 void set_conversation_timeout(uint32_t conversation_timeout) { this->conversation_timeout_ = conversation_timeout; }
177
182 Trigger<> *get_end_trigger() const { return this->end_trigger_; }
183 Trigger<> *get_start_trigger() const { return this->start_trigger_; }
186#ifdef USE_SPEAKER
189#endif
195 Trigger<> *get_idle_trigger() const { return this->idle_trigger_; }
196
199
200 void client_subscription(api::APIConnection *client, bool subscribe);
202
203 void set_wake_word(const std::string &wake_word) { this->wake_word_ = wake_word; }
204
210 void set_has_timers(bool has_timers) { this->has_timers_ = has_timers; }
211 const std::unordered_map<std::string, Timer> &get_timers() const { return this->timers_; }
212
213 protected:
214 bool allocate_buffers_();
215 void clear_buffers_();
216 void deallocate_buffers_();
217
218 void set_state_(State state);
219 void set_state_(State state, State desired_state);
220 void signal_stop_();
222
223 std::unique_ptr<socket::Socket> socket_ = nullptr;
225
233#ifdef USE_SPEAKER
236#endif
244
247
249
250 std::unordered_map<std::string, Timer> timers_;
251 void timer_tick_();
257 bool has_timers_{false};
259
261#ifdef USE_SPEAKER
262 void write_speaker_();
264 uint8_t *speaker_buffer_{nullptr};
269 bool stream_ended_{false};
270#endif
271#ifdef USE_MEDIA_PLAYER
273 std::string tts_response_url_{""};
277#endif
278
279 bool local_output_{false};
280
281 std::string conversation_id_{""};
282
283 std::string wake_word_{""};
284
285 std::shared_ptr<RingBuffer> ring_buffer_;
286
289 uint8_t auto_gain_;
292
293 uint8_t *send_buffer_{nullptr};
294
295 bool continuous_{false};
297
299
302
305 bool start_udp_socket_();
306
308
309#ifdef USE_MICRO_WAKE_WORD
311#endif
312};
313
314template<typename... Ts> class StartAction : public Action<Ts...>, public Parented<VoiceAssistant> {
315 TEMPLATABLE_VALUE(std::string, wake_word);
316
317 public:
318 void play(Ts... x) override {
319 this->parent_->set_wake_word(this->wake_word_.value(x...));
320 this->parent_->request_start(false, this->silence_detection_);
321 }
322
323 void set_silence_detection(bool silence_detection) { this->silence_detection_ = silence_detection; }
324
325 protected:
327};
328
329template<typename... Ts> class StartContinuousAction : public Action<Ts...>, public Parented<VoiceAssistant> {
330 public:
331 void play(Ts... x) override { this->parent_->request_start(true, true); }
332};
333
334template<typename... Ts> class StopAction : public Action<Ts...>, public Parented<VoiceAssistant> {
335 public:
336 void play(Ts... x) override { this->parent_->request_stop(); }
337};
338
339template<typename... Ts> class IsRunningCondition : public Condition<Ts...>, public Parented<VoiceAssistant> {
340 public:
341 bool check(Ts... x) override { return this->parent_->is_running() || this->parent_->is_continuous(); }
342};
343
344template<typename... Ts> class ConnectedCondition : public Condition<Ts...>, public Parented<VoiceAssistant> {
345 public:
346 bool check(Ts... x) override { return this->parent_->get_api_connection() != nullptr; }
347};
348
349extern VoiceAssistant *global_voice_assistant; // NOLINT(cppcoreguidelines-avoid-non-const-global-variables)
350
351} // namespace voice_assistant
352} // namespace esphome
353
354#endif // USE_VOICE_ASSISTANT
Base class for all automation conditions.
Definition automation.h:75
Helper class to easily give an object a parent of type T.
Definition helpers.h:539
void set_silence_detection(bool silence_detection)
Trigger< std::string > * get_stt_end_trigger() const
std::unique_ptr< socket::Socket > socket_
void set_conversation_timeout(uint32_t conversation_timeout)
Trigger< std::string, std::string > * get_error_trigger() const
Trigger< std::vector< Timer > > * get_timer_tick_trigger() const
std::unordered_map< std::string, Timer > timers_
Trigger< Timer > * get_timer_finished_trigger() const
void on_timer_event(const api::VoiceAssistantTimerEventResponse &msg)
Trigger< std::string > * get_tts_end_trigger() const
void on_audio(const api::VoiceAssistantAudio &msg)
Trigger< Timer > * get_timer_updated_trigger() const
media_player::MediaPlayer * media_player_
Trigger< Timer > * get_timer_cancelled_trigger() const
Trigger< std::string, std::string > * error_trigger_
void set_media_player(media_player::MediaPlayer *media_player)
void client_subscription(api::APIConnection *client, bool subscribe)
Trigger< std::vector< Timer > > * timer_tick_trigger_
std::shared_ptr< RingBuffer > ring_buffer_
void on_event(const api::VoiceAssistantEventResponse &msg)
Trigger< std::string > * tts_start_trigger_
void on_announce(const api::VoiceAssistantAnnounceRequest &msg)
void request_start(bool continuous, bool silence_detection)
void set_speaker(speaker::Speaker *speaker)
api::APIConnection * get_api_connection() const
void set_microphone_source(microphone::MicrophoneSource *mic_source)
void set_wake_word(const std::string &wake_word)
Trigger< Timer > * get_timer_started_trigger() const
void set_micro_wake_word(micro_wake_word::MicroWakeWord *mww)
void set_volume_multiplier(float volume_multiplier)
const std::unordered_map< std::string, Timer > & get_timers() const
Trigger< std::string > * intent_progress_trigger_
microphone::MicrophoneSource * mic_source_
micro_wake_word::MicroWakeWord * micro_wake_word_
void set_noise_suppression_level(uint8_t noise_suppression_level)
Trigger< std::string > * get_tts_start_trigger() const
Trigger< std::string > * get_intent_progress_trigger() const
void on_set_configuration(const std::vector< std::string > &active_wake_words)
bool state
Definition fan.h:0
VoiceAssistant * global_voice_assistant
Providing packet encoding functions for exchanging data with a remote host.
Definition a01nyub.cpp:7
std::string str_sprintf(const char *fmt,...)
Definition helpers.cpp:323
std::vector< WakeWord > available_wake_words
std::vector< std::string > active_wake_words
std::vector< std::string > trained_languages
uint16_t x
Definition tt21100.cpp:5