ESPHome 2025.5.0
Loading...
Searching...
No Matches
voice_assistant.h
Go to the documentation of this file.
1#pragma once
2
4
5#ifdef USE_VOICE_ASSISTANT
6
11
15#ifdef USE_MEDIA_PLAYER
17#endif
18#ifdef USE_MICRO_WAKE_WORD
20#endif
21#ifdef USE_SPEAKER
23#endif
25
26#include <unordered_map>
27#include <vector>
28
29namespace esphome {
30namespace voice_assistant {
31
32// Version 1: Initial version
33// Version 2: Adds raw speaker support
34static const uint32_t LEGACY_INITIAL_VERSION = 1;
35static const uint32_t LEGACY_SPEAKER_SUPPORT = 2;
36
45
61
66
67struct Timer {
68 std::string id;
69 std::string name;
70 uint32_t total_seconds;
71 uint32_t seconds_left;
73
74 std::string to_string() const {
75 return str_sprintf("Timer(id=%s, name=%s, total_seconds=%" PRIu32 ", seconds_left=%" PRIu32 ", is_active=%s)",
76 this->id.c_str(), this->name.c_str(), this->total_seconds, this->seconds_left,
77 YESNO(this->is_active));
78 }
79};
80
81struct WakeWord {
82 std::string id;
83 std::string wake_word;
84 std::vector<std::string> trained_languages;
85};
86
88 std::vector<WakeWord> available_wake_words;
89 std::vector<std::string> active_wake_words;
91};
92
93class VoiceAssistant : public Component {
94 public:
96
97 void loop() override;
98 void setup() override;
99 float get_setup_priority() const override;
100 void start_streaming();
101 void start_streaming(struct sockaddr_storage *addr, uint16_t port);
102 void failed_to_start();
103
104 void set_microphone_source(microphone::MicrophoneSource *mic_source) { this->mic_source_ = mic_source; }
105#ifdef USE_MICRO_WAKE_WORD
107#endif
108#ifdef USE_SPEAKER
110 this->speaker_ = speaker;
111 this->local_output_ = true;
112 }
113#endif
114#ifdef USE_MEDIA_PLAYER
116 this->media_player_ = media_player;
117 this->local_output_ = true;
118 }
119#endif
120
121 uint32_t get_legacy_version() const {
122#ifdef USE_SPEAKER
123 if (this->speaker_ != nullptr) {
124 return LEGACY_SPEAKER_SUPPORT;
125 }
126#endif
127 return LEGACY_INITIAL_VERSION;
128 }
129
130 uint32_t get_feature_flags() const {
131 uint32_t flags = 0;
134#ifdef USE_SPEAKER
135 if (this->speaker_ != nullptr) {
137 }
138#endif
139
140 if (this->has_timers_) {
142 }
143
144#ifdef USE_MEDIA_PLAYER
145 if (this->media_player_ != nullptr) {
148 }
149#endif
150
151 return flags;
152 }
153
154 void request_start(bool continuous, bool silence_detection);
155 void request_stop();
156
158 void on_audio(const api::VoiceAssistantAudio &msg);
161 void on_set_configuration(const std::vector<std::string> &active_wake_words);
163
164 bool is_running() const { return this->state_ != State::IDLE; }
165 void set_continuous(bool continuous) { this->continuous_ = continuous; }
166 bool is_continuous() const { return this->continuous_; }
167
168 void set_use_wake_word(bool use_wake_word) { this->use_wake_word_ = use_wake_word; }
169
170 void set_noise_suppression_level(uint8_t noise_suppression_level) {
171 this->noise_suppression_level_ = noise_suppression_level;
172 }
173 void set_auto_gain(uint8_t auto_gain) { this->auto_gain_ = auto_gain; }
174 void set_volume_multiplier(float volume_multiplier) { this->volume_multiplier_ = volume_multiplier; }
175 void set_conversation_timeout(uint32_t conversation_timeout) { this->conversation_timeout_ = conversation_timeout; }
177
181 Trigger<> *get_end_trigger() const { return this->end_trigger_; }
182 Trigger<> *get_start_trigger() const { return this->start_trigger_; }
185#ifdef USE_SPEAKER
188#endif
194 Trigger<> *get_idle_trigger() const { return this->idle_trigger_; }
195
198
199 void client_subscription(api::APIConnection *client, bool subscribe);
201
202 void set_wake_word(const std::string &wake_word) { this->wake_word_ = wake_word; }
203
209 void set_has_timers(bool has_timers) { this->has_timers_ = has_timers; }
210 const std::unordered_map<std::string, Timer> &get_timers() const { return this->timers_; }
211
212 protected:
213 bool allocate_buffers_();
214 void clear_buffers_();
215 void deallocate_buffers_();
216
217 void set_state_(State state);
218 void set_state_(State state, State desired_state);
219 void signal_stop_();
221
222 std::unique_ptr<socket::Socket> socket_ = nullptr;
224
232#ifdef USE_SPEAKER
235#endif
242
245
247
248 std::unordered_map<std::string, Timer> timers_;
249 void timer_tick_();
255 bool has_timers_{false};
257
259#ifdef USE_SPEAKER
260 void write_speaker_();
262 uint8_t *speaker_buffer_{nullptr};
267 bool stream_ended_{false};
268#endif
269#ifdef USE_MEDIA_PLAYER
273#endif
274
275 bool local_output_{false};
276
277 std::string conversation_id_{""};
278
279 std::string wake_word_{""};
280
281 std::shared_ptr<RingBuffer> ring_buffer_;
282
285 uint8_t auto_gain_;
288
289 uint8_t *send_buffer_{nullptr};
290
291 bool continuous_{false};
293
295
298
301 bool start_udp_socket_();
302
304
305#ifdef USE_MICRO_WAKE_WORD
307#endif
308};
309
310template<typename... Ts> class StartAction : public Action<Ts...>, public Parented<VoiceAssistant> {
311 TEMPLATABLE_VALUE(std::string, wake_word);
312
313 public:
314 void play(Ts... x) override {
315 this->parent_->set_wake_word(this->wake_word_.value(x...));
316 this->parent_->request_start(false, this->silence_detection_);
317 }
318
319 void set_silence_detection(bool silence_detection) { this->silence_detection_ = silence_detection; }
320
321 protected:
323};
324
325template<typename... Ts> class StartContinuousAction : public Action<Ts...>, public Parented<VoiceAssistant> {
326 public:
327 void play(Ts... x) override { this->parent_->request_start(true, true); }
328};
329
330template<typename... Ts> class StopAction : public Action<Ts...>, public Parented<VoiceAssistant> {
331 public:
332 void play(Ts... x) override { this->parent_->request_stop(); }
333};
334
335template<typename... Ts> class IsRunningCondition : public Condition<Ts...>, public Parented<VoiceAssistant> {
336 public:
337 bool check(Ts... x) override { return this->parent_->is_running() || this->parent_->is_continuous(); }
338};
339
340template<typename... Ts> class ConnectedCondition : public Condition<Ts...>, public Parented<VoiceAssistant> {
341 public:
342 bool check(Ts... x) override { return this->parent_->get_api_connection() != nullptr; }
343};
344
345extern VoiceAssistant *global_voice_assistant; // NOLINT(cppcoreguidelines-avoid-non-const-global-variables)
346
347} // namespace voice_assistant
348} // namespace esphome
349
350#endif // USE_VOICE_ASSISTANT
Base class for all automation conditions.
Definition automation.h:75
Helper class to easily give an object a parent of type T.
Definition helpers.h:538
void set_silence_detection(bool silence_detection)
Trigger< std::string > * get_stt_end_trigger() const
std::unique_ptr< socket::Socket > socket_
void set_conversation_timeout(uint32_t conversation_timeout)
Trigger< std::string, std::string > * get_error_trigger() const
Trigger< std::vector< Timer > > * get_timer_tick_trigger() const
std::unordered_map< std::string, Timer > timers_
Trigger< Timer > * get_timer_finished_trigger() const
void on_timer_event(const api::VoiceAssistantTimerEventResponse &msg)
Trigger< std::string > * get_tts_end_trigger() const
void on_audio(const api::VoiceAssistantAudio &msg)
Trigger< Timer > * get_timer_updated_trigger() const
media_player::MediaPlayer * media_player_
Trigger< Timer > * get_timer_cancelled_trigger() const
Trigger< std::string, std::string > * error_trigger_
void set_media_player(media_player::MediaPlayer *media_player)
void client_subscription(api::APIConnection *client, bool subscribe)
Trigger< std::vector< Timer > > * timer_tick_trigger_
std::shared_ptr< RingBuffer > ring_buffer_
void on_event(const api::VoiceAssistantEventResponse &msg)
Trigger< std::string > * tts_start_trigger_
void on_announce(const api::VoiceAssistantAnnounceRequest &msg)
void request_start(bool continuous, bool silence_detection)
void set_speaker(speaker::Speaker *speaker)
api::APIConnection * get_api_connection() const
void set_microphone_source(microphone::MicrophoneSource *mic_source)
void set_wake_word(const std::string &wake_word)
Trigger< Timer > * get_timer_started_trigger() const
void set_micro_wake_word(micro_wake_word::MicroWakeWord *mww)
void set_volume_multiplier(float volume_multiplier)
const std::unordered_map< std::string, Timer > & get_timers() const
microphone::MicrophoneSource * mic_source_
micro_wake_word::MicroWakeWord * micro_wake_word_
void set_noise_suppression_level(uint8_t noise_suppression_level)
Trigger< std::string > * get_tts_start_trigger() const
void on_set_configuration(const std::vector< std::string > &active_wake_words)
bool state
Definition fan.h:0
VoiceAssistant * global_voice_assistant
Providing packet encoding functions for exchanging data with a remote host.
Definition a01nyub.cpp:7
std::string str_sprintf(const char *fmt,...)
Definition helpers.cpp:323
std::vector< WakeWord > available_wake_words
std::vector< std::string > active_wake_words
std::vector< std::string > trained_languages
uint16_t x
Definition tt21100.cpp:5