ESPHome: esphome/components/voice_assistant/voice_assistant.cpp Source File

#include "voice_assistant.h"

#include "esphome/core/defines.h"


#ifdef USE_VOICE_ASSISTANT


#include "esphome/core/log.h"


#include <cinttypes>

#include <cstdio>


namespace esphome {


namespace voice_assistant {


static const char *const TAG = "voice_assistant";


#ifdef SAMPLE_RATE_HZ

#undef SAMPLE_RATE_HZ

#endif


static const size_t SAMPLE_RATE_HZ = 16000;


static const size_t RING_BUFFER_SAMPLES = 512 * SAMPLE_RATE_HZ / 1000;  // 512 ms * 16 kHz/ 1000 ms

static const size_t RING_BUFFER_SIZE = RING_BUFFER_SAMPLES * sizeof(int16_t);

static const size_t SEND_BUFFER_SAMPLES = 32 * SAMPLE_RATE_HZ / 1000;  // 32ms * 16kHz / 1000ms

static const size_t SEND_BUFFER_SIZE = SEND_BUFFER_SAMPLES * sizeof(int16_t);

static const size_t RECEIVE_SIZE = 1024;

static const size_t SPEAKER_BUFFER_SIZE = 16 * RECEIVE_SIZE;


VoiceAssistant::VoiceAssistant() { global_voice_assistant = this; }


void VoiceAssistant::setup() {

  this->mic_source_->add_data_callback([this](const std::vector<uint8_t> &data) {

    std::shared_ptr<RingBuffer> temp_ring_buffer = this->ring_buffer_;

    if (this->ring_buffer_.use_count() > 1) {

      temp_ring_buffer->write((void *) data.data(), data.size());

    }

  });

}


float VoiceAssistant::get_setup_priority() const { return setup_priority::AFTER_CONNECTION; }


bool VoiceAssistant::start_udp_socket_() {

  this->socket_ = socket::socket(AF_INET, SOCK_DGRAM, IPPROTO_IP);

  if (this->socket_ == nullptr) {

    ESP_LOGE(TAG, "Could not create socket");

    this->mark_failed();

    return false;

  }

  int enable = 1;

  int err = this->socket_->setsockopt(SOL_SOCKET, SO_REUSEADDR, &enable, sizeof(int));

  if (err != 0) {

    ESP_LOGW(TAG, "Socket unable to set reuseaddr: errno %d", err);

    // we can still continue

  }

  err = this->socket_->setblocking(false);

  if (err != 0) {

    ESP_LOGE(TAG, "Socket unable to set nonblocking mode: errno %d", err);

    this->mark_failed();

    return false;

  }


#ifdef USE_SPEAKER

  if (this->speaker_ != nullptr) {

    struct sockaddr_storage server;


    socklen_t sl = socket::set_sockaddr_any((struct sockaddr *) &server, sizeof(server), 6055);

    if (sl == 0) {

      ESP_LOGE(TAG, "Socket unable to set sockaddr: errno %d", errno);

      this->mark_failed();

      return false;

    }


    err = this->socket_->bind((struct sockaddr *) &server, sizeof(server));

    if (err != 0) {

      ESP_LOGE(TAG, "Socket unable to bind: errno %d", errno);

      this->mark_failed();

      return false;

    }

  }

#endif

  this->udp_socket_running_ = true;

  return true;

}


bool VoiceAssistant::allocate_buffers_() {

#ifdef USE_SPEAKER

  if ((this->speaker_ != nullptr) && (this->speaker_buffer_ == nullptr)) {

    ExternalRAMAllocator<uint8_t> speaker_allocator(ExternalRAMAllocator<uint8_t>::ALLOW_FAILURE);

    this->speaker_buffer_ = speaker_allocator.allocate(SPEAKER_BUFFER_SIZE);

    if (this->speaker_buffer_ == nullptr) {

      ESP_LOGW(TAG, "Could not allocate speaker buffer");

      return false;

    }

  }

#endif


  if (this->ring_buffer_.use_count() == 0) {

    this->ring_buffer_ = RingBuffer::create(RING_BUFFER_SIZE);

    if (this->ring_buffer_.use_count() == 0) {

      ESP_LOGE(TAG, "Could not allocate ring buffer");

      return false;

    }

  }


  if (this->send_buffer_ == nullptr) {

    ExternalRAMAllocator<uint8_t> send_allocator(ExternalRAMAllocator<uint8_t>::ALLOW_FAILURE);

    this->send_buffer_ = send_allocator.allocate(SEND_BUFFER_SIZE);

    if (send_buffer_ == nullptr) {

      ESP_LOGW(TAG, "Could not allocate send buffer");

      return false;

    }

  }


  return true;

}


void VoiceAssistant::clear_buffers_() {

  if (this->send_buffer_ != nullptr) {

    memset(this->send_buffer_, 0, SEND_BUFFER_SIZE);

  }


  if (this->ring_buffer_ != nullptr) {

    this->ring_buffer_->reset();

  }


#ifdef USE_SPEAKER

  if ((this->speaker_ != nullptr) && (this->speaker_buffer_ != nullptr)) {

    memset(this->speaker_buffer_, 0, SPEAKER_BUFFER_SIZE);


    this->speaker_buffer_size_ = 0;

    this->speaker_buffer_index_ = 0;

    this->speaker_bytes_received_ = 0;

  }

#endif

}


void VoiceAssistant::deallocate_buffers_() {

  if (this->send_buffer_ != nullptr) {

    ExternalRAMAllocator<uint8_t> send_deallocator(ExternalRAMAllocator<uint8_t>::ALLOW_FAILURE);

    send_deallocator.deallocate(this->send_buffer_, SEND_BUFFER_SIZE);

    this->send_buffer_ = nullptr;

  }


  if (this->ring_buffer_.use_count() > 0) {

    this->ring_buffer_.reset();

  }


#ifdef USE_SPEAKER

  if ((this->speaker_ != nullptr) && (this->speaker_buffer_ != nullptr)) {

    ExternalRAMAllocator<uint8_t> speaker_deallocator(ExternalRAMAllocator<uint8_t>::ALLOW_FAILURE);

    speaker_deallocator.deallocate(this->speaker_buffer_, SPEAKER_BUFFER_SIZE);

    this->speaker_buffer_ = nullptr;

  }

#endif

}


void VoiceAssistant::reset_conversation_id() {

  this->conversation_id_ = "";

  ESP_LOGD(TAG, "reset conversation ID");

}


void VoiceAssistant::loop() {

  if (this->api_client_ == nullptr && this->state_ != State::IDLE && this->state_ != State::STOP_MICROPHONE &&

      this->state_ != State::STOPPING_MICROPHONE) {

    if (this->mic_source_->is_running() || this->state_ == State::STARTING_MICROPHONE) {

      this->set_state_(State::STOP_MICROPHONE, State::IDLE);

    } else {

      this->set_state_(State::IDLE, State::IDLE);

    }

    this->continuous_ = false;

    this->signal_stop_();

    this->clear_buffers_();

    return;

  }

  switch (this->state_) {

    case State::IDLE: {

      if (this->continuous_ && this->desired_state_ == State::IDLE) {

        this->idle_trigger_->trigger();

        this->set_state_(State::START_MICROPHONE, State::START_PIPELINE);

      } else {

        this->deallocate_buffers_();

      }

      break;

    }

    case State::START_MICROPHONE: {

      ESP_LOGD(TAG, "Starting Microphone");

      if (!this->allocate_buffers_()) {

        this->status_set_error("Failed to allocate buffers");

        return;

      }

      if (this->status_has_error()) {

        this->status_clear_error();

      }

      this->clear_buffers_();


      this->mic_source_->start();

      this->set_state_(State::STARTING_MICROPHONE);

      break;

    }

    case State::STARTING_MICROPHONE: {

      if (this->mic_source_->is_running()) {

        this->set_state_(this->desired_state_);

      }

      break;

    }

    case State::START_PIPELINE: {

      ESP_LOGD(TAG, "Requesting start...");

      uint32_t flags = 0;

      if (!this->continue_conversation_ && this->use_wake_word_)

        flags |= api::enums::VOICE_ASSISTANT_REQUEST_USE_WAKE_WORD;

      if (this->silence_detection_)

        flags |= api::enums::VOICE_ASSISTANT_REQUEST_USE_VAD;

      api::VoiceAssistantAudioSettings audio_settings;

      audio_settings.noise_suppression_level = this->noise_suppression_level_;

      audio_settings.auto_gain = this->auto_gain_;

      audio_settings.volume_multiplier = this->volume_multiplier_;


      api::VoiceAssistantRequest msg;

      msg.start = true;

      msg.conversation_id = this->conversation_id_;

      msg.flags = flags;

      msg.audio_settings = audio_settings;

      msg.wake_word_phrase = this->wake_word_;

      this->wake_word_ = "";


      if (this->api_client_ == nullptr || !this->api_client_->send_voice_assistant_request(msg)) {

        ESP_LOGW(TAG, "Could not request start");

        this->error_trigger_->trigger("not-connected", "Could not request start");

        this->continuous_ = false;

        this->set_state_(State::IDLE, State::IDLE);

        break;

      }

      this->set_state_(State::STARTING_PIPELINE);

      this->set_timeout("reset-conversation_id", this->conversation_timeout_,

                        [this]() { this->reset_conversation_id(); });

      break;

    }

    case State::STARTING_PIPELINE: {

      break;  // State changed when udp server port received

    }

    case State::STREAMING_MICROPHONE: {

      size_t available = this->ring_buffer_->available();

      while (available >= SEND_BUFFER_SIZE) {

        size_t read_bytes = this->ring_buffer_->read((void *) this->send_buffer_, SEND_BUFFER_SIZE, 0);

        if (this->audio_mode_ == AUDIO_MODE_API) {

          api::VoiceAssistantAudio msg;

          msg.data.assign((char *) this->send_buffer_, read_bytes);

          this->api_client_->send_voice_assistant_audio(msg);

        } else {

          if (!this->udp_socket_running_) {

            if (!this->start_udp_socket_()) {

              this->set_state_(State::STOP_MICROPHONE, State::IDLE);

              break;

            }

          }

          this->socket_->sendto(this->send_buffer_, read_bytes, 0, (struct sockaddr *) &this->dest_addr_,

                                sizeof(this->dest_addr_));

        }

        available = this->ring_buffer_->available();

      }


      break;

    }

    case State::STOP_MICROPHONE: {

      if (this->mic_source_->is_running()) {

        this->mic_source_->stop();

        this->set_state_(State::STOPPING_MICROPHONE);

      } else {

        this->set_state_(this->desired_state_);

      }

      break;

    }

    case State::STOPPING_MICROPHONE: {

      if (this->mic_source_->is_stopped()) {

        this->set_state_(this->desired_state_);

      }

      break;

    }

    case State::AWAITING_RESPONSE: {

      break;  // State changed by events

    }

    case State::STREAMING_RESPONSE: {

      bool playing = false;

#ifdef USE_SPEAKER

      if (this->speaker_ != nullptr) {

        ssize_t received_len = 0;

        if (this->audio_mode_ == AUDIO_MODE_UDP) {

          if (this->speaker_buffer_index_ + RECEIVE_SIZE < SPEAKER_BUFFER_SIZE) {

            received_len = this->socket_->read(this->speaker_buffer_ + this->speaker_buffer_index_, RECEIVE_SIZE);

            if (received_len > 0) {

              this->speaker_buffer_index_ += received_len;

              this->speaker_buffer_size_ += received_len;

              this->speaker_bytes_received_ += received_len;

            }

          } else {

            ESP_LOGD(TAG, "Receive buffer full");

          }

        }

        // Build a small buffer of audio before sending to the speaker

        bool end_of_stream = this->stream_ended_ && (this->audio_mode_ == AUDIO_MODE_API || received_len < 0);

        if (this->speaker_bytes_received_ > RECEIVE_SIZE * 4 || end_of_stream)

          this->write_speaker_();

        if (this->wait_for_stream_end_) {

          this->cancel_timeout("playing");

          if (end_of_stream) {

            ESP_LOGD(TAG, "End of audio stream received");

            this->cancel_timeout("speaker-timeout");

            this->set_state_(State::RESPONSE_FINISHED, State::RESPONSE_FINISHED);

          }

          break;  // We dont want to timeout here as the STREAM_END event will take care of that.

        }

        playing = this->speaker_->is_running();

      }

#endif

#ifdef USE_MEDIA_PLAYER

      if (this->media_player_ != nullptr) {

        playing = (this->media_player_->state == media_player::MediaPlayerState::MEDIA_PLAYER_STATE_ANNOUNCING);


        if (playing && this->media_player_wait_for_announcement_start_) {

          // Announcement has started playing, wait for it to finish

          this->media_player_wait_for_announcement_start_ = false;

          this->media_player_wait_for_announcement_end_ = true;

        }


        if (!playing && this->media_player_wait_for_announcement_end_) {

          // Announcement has finished playing

          this->media_player_wait_for_announcement_end_ = false;

          this->cancel_timeout("playing");

          ESP_LOGD(TAG, "Announcement finished playing");

          this->set_state_(State::RESPONSE_FINISHED, State::RESPONSE_FINISHED);


          api::VoiceAssistantAnnounceFinished msg;

          msg.success = true;

          this->api_client_->send_voice_assistant_announce_finished(msg);

          break;

        }

      }

#endif

      if (playing) {

        this->start_playback_timeout_();

      }

      break;

    }

    case State::RESPONSE_FINISHED: {

#ifdef USE_SPEAKER

      if (this->speaker_ != nullptr) {

        if (this->speaker_buffer_size_ > 0) {

          this->write_speaker_();

          break;

        }

        if (this->speaker_->has_buffered_data() || this->speaker_->is_running()) {

          break;

        }

        ESP_LOGD(TAG, "Speaker has finished outputting all audio");

        this->speaker_->stop();

        this->cancel_timeout("speaker-timeout");

        this->cancel_timeout("playing");


        this->clear_buffers_();


        this->wait_for_stream_end_ = false;

        this->stream_ended_ = false;


        this->tts_stream_end_trigger_->trigger();

      }

#endif

      if (this->continue_conversation_) {

        this->set_state_(State::START_MICROPHONE, State::START_PIPELINE);

      } else {

        this->set_state_(State::IDLE, State::IDLE);

      }

      break;

    }

    default:

      break;

  }

}


#ifdef USE_SPEAKER


void VoiceAssistant::write_speaker_() {

  if ((this->speaker_ != nullptr) && (this->speaker_buffer_ != nullptr)) {

    if (this->speaker_buffer_size_ > 0) {

      size_t write_chunk = std::min<size_t>(this->speaker_buffer_size_, 4 * 1024);

      size_t written = this->speaker_->play(this->speaker_buffer_, write_chunk);

      if (written > 0) {

        memmove(this->speaker_buffer_, this->speaker_buffer_ + written, this->speaker_buffer_size_ - written);

        this->speaker_buffer_size_ -= written;

        this->speaker_buffer_index_ -= written;

        this->set_timeout("speaker-timeout", 5000, [this]() { this->speaker_->stop(); });

      } else {

        ESP_LOGV(TAG, "Speaker buffer full, trying again next loop");

      }

    }

  }

}


#endif


void VoiceAssistant::client_subscription(api::APIConnection *client, bool subscribe) {

  if (!subscribe) {

    if (this->api_client_ == nullptr || client != this->api_client_) {

      ESP_LOGE(TAG, "Client attempting to unsubscribe that is not the current API Client");

      return;

    }

    this->api_client_ = nullptr;

    this->client_disconnected_trigger_->trigger();

    return;

  }


  if (this->api_client_ != nullptr) {

    ESP_LOGE(TAG, "Multiple API Clients attempting to connect to Voice Assistant");

    ESP_LOGE(TAG, "Current client: %s", this->api_client_->get_client_combined_info().c_str());

    ESP_LOGE(TAG, "New client: %s", client->get_client_combined_info().c_str());

    return;

  }


  this->api_client_ = client;

  this->client_connected_trigger_->trigger();

}


static const LogString *voice_assistant_state_to_string(State state) {

  switch (state) {

    case State::IDLE:

      return LOG_STR("IDLE");

    case State::START_MICROPHONE:

      return LOG_STR("START_MICROPHONE");

    case State::STARTING_MICROPHONE:

      return LOG_STR("STARTING_MICROPHONE");

    case State::WAIT_FOR_VAD:

      return LOG_STR("WAIT_FOR_VAD");

    case State::WAITING_FOR_VAD:

      return LOG_STR("WAITING_FOR_VAD");

    case State::START_PIPELINE:

      return LOG_STR("START_PIPELINE");

    case State::STARTING_PIPELINE:

      return LOG_STR("STARTING_PIPELINE");

    case State::STREAMING_MICROPHONE:

      return LOG_STR("STREAMING_MICROPHONE");

    case State::STOP_MICROPHONE:

      return LOG_STR("STOP_MICROPHONE");

    case State::STOPPING_MICROPHONE:

      return LOG_STR("STOPPING_MICROPHONE");

    case State::AWAITING_RESPONSE:

      return LOG_STR("AWAITING_RESPONSE");

    case State::STREAMING_RESPONSE:

      return LOG_STR("STREAMING_RESPONSE");

    case State::RESPONSE_FINISHED:

      return LOG_STR("RESPONSE_FINISHED");

    default:

      return LOG_STR("UNKNOWN");

  }

};


void VoiceAssistant::set_state_(State state) {

  State old_state = this->state_;

  this->state_ = state;

  ESP_LOGD(TAG, "State changed from %s to %s", LOG_STR_ARG(voice_assistant_state_to_string(old_state)),

           LOG_STR_ARG(voice_assistant_state_to_string(state)));

}


void VoiceAssistant::set_state_(State state, State desired_state) {

  this->set_state_(state);

  this->desired_state_ = desired_state;

  ESP_LOGD(TAG, "Desired state set to %s", LOG_STR_ARG(voice_assistant_state_to_string(desired_state)));

}


void VoiceAssistant::failed_to_start() {

  ESP_LOGE(TAG, "Failed to start server. See Home Assistant logs for more details.");

  this->error_trigger_->trigger("failed-to-start", "Failed to start server. See Home Assistant logs for more details.");

  this->set_state_(State::STOP_MICROPHONE, State::IDLE);

}


void VoiceAssistant::start_streaming() {

  if (this->state_ != State::STARTING_PIPELINE) {

    this->signal_stop_();

    return;

  }


  ESP_LOGD(TAG, "Client started, streaming microphone");

  this->audio_mode_ = AUDIO_MODE_API;


  if (this->mic_source_->is_running()) {

    this->set_state_(State::STREAMING_MICROPHONE, State::STREAMING_MICROPHONE);

  } else {

    this->set_state_(State::START_MICROPHONE, State::STREAMING_MICROPHONE);

  }

}


void VoiceAssistant::start_streaming(struct sockaddr_storage *addr, uint16_t port) {

  if (this->state_ != State::STARTING_PIPELINE) {

    this->signal_stop_();

    return;

  }


  ESP_LOGD(TAG, "Client started, streaming microphone");

  this->audio_mode_ = AUDIO_MODE_UDP;


  memcpy(&this->dest_addr_, addr, sizeof(this->dest_addr_));

  if (this->dest_addr_.ss_family == AF_INET) {

    ((struct sockaddr_in *) &this->dest_addr_)->sin_port = htons(port);

  }

#if LWIP_IPV6

  else if (this->dest_addr_.ss_family == AF_INET6) {

    ((struct sockaddr_in6 *) &this->dest_addr_)->sin6_port = htons(port);

  }

#endif

  else {

    ESP_LOGW(TAG, "Unknown address family: %d", this->dest_addr_.ss_family);

    return;

  }


  if (this->mic_source_->is_running()) {

    this->set_state_(State::STREAMING_MICROPHONE, State::STREAMING_MICROPHONE);

  } else {

    this->set_state_(State::START_MICROPHONE, State::STREAMING_MICROPHONE);

  }

}


void VoiceAssistant::request_start(bool continuous, bool silence_detection) {

  if (this->api_client_ == nullptr) {

    ESP_LOGE(TAG, "No API client connected");

    this->set_state_(State::IDLE, State::IDLE);

    this->continuous_ = false;

    return;

  }

  if (this->state_ == State::IDLE) {

    this->continuous_ = continuous;

    this->silence_detection_ = silence_detection;


    this->set_state_(State::START_MICROPHONE, State::START_PIPELINE);

  }

}


void VoiceAssistant::request_stop() {

  this->continuous_ = false;

  this->continue_conversation_ = false;


  switch (this->state_) {

    case State::IDLE:

      break;

    case State::START_MICROPHONE:

    case State::STARTING_MICROPHONE:

    case State::WAIT_FOR_VAD:

    case State::WAITING_FOR_VAD:

    case State::START_PIPELINE:

      this->set_state_(State::STOP_MICROPHONE, State::IDLE);

      break;

    case State::STARTING_PIPELINE:

    case State::STREAMING_MICROPHONE:

      this->signal_stop_();

      this->set_state_(State::STOP_MICROPHONE, State::IDLE);

      break;

    case State::STOP_MICROPHONE:

    case State::STOPPING_MICROPHONE:

      this->desired_state_ = State::IDLE;

      break;

    case State::AWAITING_RESPONSE:

      this->signal_stop_();

      break;

    case State::STREAMING_RESPONSE:

#ifdef USE_MEDIA_PLAYER

      // Stop any ongoing media player announcement

      if (this->media_player_ != nullptr) {

        this->media_player_->make_call()

            .set_command(media_player::MEDIA_PLAYER_COMMAND_STOP)

            .set_announcement(true)

            .perform();

      }

#endif

      break;

    case State::RESPONSE_FINISHED:

      break;  // Let the incoming audio stream finish then it will go to idle.

  }

}


void VoiceAssistant::signal_stop_() {

  memset(&this->dest_addr_, 0, sizeof(this->dest_addr_));

  if (this->api_client_ == nullptr) {

    return;

  }

  ESP_LOGD(TAG, "Signaling stop...");

  api::VoiceAssistantRequest msg;

  msg.start = false;

  this->api_client_->send_voice_assistant_request(msg);

}


void VoiceAssistant::start_playback_timeout_() {

  this->set_timeout("playing", 2000, [this]() {

    this->cancel_timeout("speaker-timeout");

    this->set_state_(State::RESPONSE_FINISHED, State::RESPONSE_FINISHED);


    api::VoiceAssistantAnnounceFinished msg;

    msg.success = true;

    this->api_client_->send_voice_assistant_announce_finished(msg);

  });

}


void VoiceAssistant::on_event(const api::VoiceAssistantEventResponse &msg) {

  ESP_LOGD(TAG, "Event Type: %" PRId32, msg.event_type);

  switch (msg.event_type) {

    case api::enums::VOICE_ASSISTANT_RUN_START:

      ESP_LOGD(TAG, "Assist Pipeline running");

      this->defer([this]() { this->start_trigger_->trigger(); });

      break;

    case api::enums::VOICE_ASSISTANT_WAKE_WORD_START:

      break;

    case api::enums::VOICE_ASSISTANT_WAKE_WORD_END: {

      ESP_LOGD(TAG, "Wake word detected");

      this->defer([this]() { this->wake_word_detected_trigger_->trigger(); });

      break;

    }

    case api::enums::VOICE_ASSISTANT_STT_START:

      ESP_LOGD(TAG, "STT started");

      this->defer([this]() { this->listening_trigger_->trigger(); });

      break;

    case api::enums::VOICE_ASSISTANT_STT_END: {

      std::string text;

      for (auto arg : msg.data) {

        if (arg.name == "text") {

          text = std::move(arg.value);

        }

      }

      if (text.empty()) {

        ESP_LOGW(TAG, "No text in STT_END event");

        return;

      }

      ESP_LOGD(TAG, "Speech recognised as: \"%s\"", text.c_str());

      this->defer([this, text]() { this->stt_end_trigger_->trigger(text); });

      break;

    }

    case api::enums::VOICE_ASSISTANT_INTENT_START:

      ESP_LOGD(TAG, "Intent started");

      this->defer([this]() { this->intent_start_trigger_->trigger(); });

      break;

    case api::enums::VOICE_ASSISTANT_INTENT_END: {

      for (auto arg : msg.data) {

        if (arg.name == "conversation_id") {

          this->conversation_id_ = std::move(arg.value);

        } else if (arg.name == "continue_conversation") {

          this->continue_conversation_ = (arg.value == "1");

        }

      }

      this->defer([this]() { this->intent_end_trigger_->trigger(); });

      break;

    }

    case api::enums::VOICE_ASSISTANT_TTS_START: {

      std::string text;

      for (auto arg : msg.data) {

        if (arg.name == "text") {

          text = std::move(arg.value);

        }

      }

      if (text.empty()) {

        ESP_LOGW(TAG, "No text in TTS_START event");

        return;

      }

      ESP_LOGD(TAG, "Response: \"%s\"", text.c_str());

      this->defer([this, text]() {

        this->tts_start_trigger_->trigger(text);

#ifdef USE_SPEAKER

        if (this->speaker_ != nullptr) {

          this->speaker_->start();

        }

#endif

      });

      break;

    }

    case api::enums::VOICE_ASSISTANT_TTS_END: {

      std::string url;

      for (auto arg : msg.data) {

        if (arg.name == "url") {

          url = std::move(arg.value);

        }

      }

      if (url.empty()) {

        ESP_LOGW(TAG, "No url in TTS_END event");

        return;

      }

      ESP_LOGD(TAG, "Response URL: \"%s\"", url.c_str());

      this->defer([this, url]() {

#ifdef USE_MEDIA_PLAYER

        if (this->media_player_ != nullptr) {

          this->media_player_->make_call().set_media_url(url).set_announcement(true).perform();


          this->media_player_wait_for_announcement_start_ = true;

          this->media_player_wait_for_announcement_end_ = false;

          // Start the playback timeout, as the media player state isn't immediately updated

          this->start_playback_timeout_();

        }

#endif

        this->tts_end_trigger_->trigger(url);

      });

      State new_state = this->local_output_ ? State::STREAMING_RESPONSE : State::IDLE;

      this->set_state_(new_state, new_state);

      break;

    }

    case api::enums::VOICE_ASSISTANT_RUN_END: {

      ESP_LOGD(TAG, "Assist Pipeline ended");

      if ((this->state_ == State::START_PIPELINE) || (this->state_ == State::STARTING_PIPELINE) ||

          (this->state_ == State::STREAMING_MICROPHONE)) {

        // Microphone is running, stop it

        this->set_state_(State::STOP_MICROPHONE, State::IDLE);

      } else if (this->state_ == State::AWAITING_RESPONSE) {

        // No TTS start event ("nevermind")

        this->set_state_(State::IDLE, State::IDLE);

      }

      this->defer([this]() { this->end_trigger_->trigger(); });

      break;

    }

    case api::enums::VOICE_ASSISTANT_ERROR: {

      std::string code = "";

      std::string message = "";

      for (auto arg : msg.data) {

        if (arg.name == "code") {

          code = std::move(arg.value);

        } else if (arg.name == "message") {

          message = std::move(arg.value);

        }

      }

      if (code == "wake-word-timeout" || code == "wake_word_detection_aborted" || code == "no_wake_word") {

        // Don't change state here since either the "tts-end" or "run-end" events will do it.

        return;

      } else if (code == "wake-provider-missing" || code == "wake-engine-missing") {

        // Wake word is not set up or not ready on Home Assistant so stop and do not retry until user starts again.

        this->defer([this, code, message]() {

          this->request_stop();

          this->error_trigger_->trigger(code, message);

        });

        return;

      }

      ESP_LOGE(TAG, "Error: %s - %s", code.c_str(), message.c_str());

      if (this->state_ != State::IDLE) {

        this->signal_stop_();

        this->set_state_(State::STOP_MICROPHONE, State::IDLE);

      }

      this->defer([this, code, message]() { this->error_trigger_->trigger(code, message); });

      break;

    }

    case api::enums::VOICE_ASSISTANT_TTS_STREAM_START: {

#ifdef USE_SPEAKER

      if (this->speaker_ != nullptr) {

        this->wait_for_stream_end_ = true;

        ESP_LOGD(TAG, "TTS stream start");

        this->defer([this] { this->tts_stream_start_trigger_->trigger(); });

      }

#endif

      break;

    }

    case api::enums::VOICE_ASSISTANT_TTS_STREAM_END: {

#ifdef USE_SPEAKER

      if (this->speaker_ != nullptr) {

        this->stream_ended_ = true;

        ESP_LOGD(TAG, "TTS stream end");

      }

#endif

      break;

    }

    case api::enums::VOICE_ASSISTANT_STT_VAD_START:

      ESP_LOGD(TAG, "Starting STT by VAD");

      this->defer([this]() { this->stt_vad_start_trigger_->trigger(); });

      break;

    case api::enums::VOICE_ASSISTANT_STT_VAD_END:

      ESP_LOGD(TAG, "STT by VAD end");

      this->set_state_(State::STOP_MICROPHONE, State::AWAITING_RESPONSE);

      this->defer([this]() { this->stt_vad_end_trigger_->trigger(); });

      break;

    default:

      ESP_LOGD(TAG, "Unhandled event type: %" PRId32, msg.event_type);

      break;

  }

}


void VoiceAssistant::on_audio(const api::VoiceAssistantAudio &msg) {

#ifdef USE_SPEAKER  // We should never get to this function if there is no speaker anyway

  if ((this->speaker_ != nullptr) && (this->speaker_buffer_ != nullptr)) {

    if (this->speaker_buffer_index_ + msg.data.length() < SPEAKER_BUFFER_SIZE) {

      memcpy(this->speaker_buffer_ + this->speaker_buffer_index_, msg.data.data(), msg.data.length());

      this->speaker_buffer_index_ += msg.data.length();

      this->speaker_buffer_size_ += msg.data.length();

      this->speaker_bytes_received_ += msg.data.length();

      ESP_LOGV(TAG, "Received audio: %u bytes from API", msg.data.length());

    } else {

      ESP_LOGE(TAG, "Cannot receive audio, buffer is full");

    }

  }

#endif

}


void VoiceAssistant::on_timer_event(const api::VoiceAssistantTimerEventResponse &msg) {

  Timer timer = {

      .id = msg.timer_id,

      .name = msg.name,

      .total_seconds = msg.total_seconds,

      .seconds_left = msg.seconds_left,

      .is_active = msg.is_active,

  };

  this->timers_[timer.id] = timer;

  ESP_LOGD(TAG, "Timer Event");

  ESP_LOGD(TAG, "  Type: %" PRId32, msg.event_type);

  ESP_LOGD(TAG, "  %s", timer.to_string().c_str());


  switch (msg.event_type) {

    case api::enums::VOICE_ASSISTANT_TIMER_STARTED:

      this->timer_started_trigger_->trigger(timer);

      break;

    case api::enums::VOICE_ASSISTANT_TIMER_UPDATED:

      this->timer_updated_trigger_->trigger(timer);

      break;

    case api::enums::VOICE_ASSISTANT_TIMER_CANCELLED:

      this->timer_cancelled_trigger_->trigger(timer);

      this->timers_.erase(timer.id);

      break;

    case api::enums::VOICE_ASSISTANT_TIMER_FINISHED:

      this->timer_finished_trigger_->trigger(timer);

      this->timers_.erase(timer.id);

      break;

  }


  if (this->timers_.empty()) {

    this->cancel_interval("timer-event");

    this->timer_tick_running_ = false;

  } else if (!this->timer_tick_running_) {

    this->set_interval("timer-event", 1000, [this]() { this->timer_tick_(); });

    this->timer_tick_running_ = true;

  }

}


void VoiceAssistant::timer_tick_() {

  std::vector<Timer> res;

  res.reserve(this->timers_.size());

  for (auto &pair : this->timers_) {

    auto &timer = pair.second;

    if (timer.is_active && timer.seconds_left > 0) {

      timer.seconds_left--;

    }

    res.push_back(timer);

  }

  this->timer_tick_trigger_->trigger(res);

}


void VoiceAssistant::on_announce(const api::VoiceAssistantAnnounceRequest &msg) {

#ifdef USE_MEDIA_PLAYER

  if (this->media_player_ != nullptr) {

    this->tts_start_trigger_->trigger(msg.text);

    if (!msg.preannounce_media_id.empty()) {

      this->media_player_->make_call().set_media_url(msg.preannounce_media_id).set_announcement(true).perform();

    }

    // Enqueueing a URL with an empty playlist will still play the file immediately

    this->media_player_->make_call()

        .set_command(media_player::MEDIA_PLAYER_COMMAND_ENQUEUE)

        .set_media_url(msg.media_id)

        .set_announcement(true)

        .perform();

    this->continue_conversation_ = msg.start_conversation;


    this->media_player_wait_for_announcement_start_ = true;

    this->media_player_wait_for_announcement_end_ = false;

    // Start the playback timeout, as the media player state isn't immediately updated

    this->start_playback_timeout_();


    if (this->continuous_) {

      this->set_state_(State::STOP_MICROPHONE, State::STREAMING_RESPONSE);

    } else {

      this->set_state_(State::STREAMING_RESPONSE, State::STREAMING_RESPONSE);

    }


    this->tts_end_trigger_->trigger(msg.media_id);

    this->end_trigger_->trigger();

  }

#endif

}


void VoiceAssistant::on_set_configuration(const std::vector<std::string> &active_wake_words) {

#ifdef USE_MICRO_WAKE_WORD

  if (this->micro_wake_word_) {

    // Disable all wake words first

    for (auto &model : this->micro_wake_word_->get_wake_words()) {

      model->disable();

    }


    // Enable only active wake words

    for (auto ww_id : active_wake_words) {

      for (auto &model : this->micro_wake_word_->get_wake_words()) {

        if (model->get_id() == ww_id) {

          model->enable();

          ESP_LOGD(TAG, "Enabled wake word: %s (id=%s)", model->get_wake_word().c_str(), model->get_id().c_str());

        }

      }

    }

  }

#endif

};


const Configuration &VoiceAssistant::get_configuration() {

  this->config_.available_wake_words.clear();

  this->config_.active_wake_words.clear();


#ifdef USE_MICRO_WAKE_WORD

  if (this->micro_wake_word_) {

    this->config_.max_active_wake_words = 1;


    for (auto &model : this->micro_wake_word_->get_wake_words()) {

      if (model->is_enabled()) {

        this->config_.active_wake_words.push_back(model->get_id());

      }


      WakeWord wake_word;

      wake_word.id = model->get_id();

      wake_word.wake_word = model->get_wake_word();

      for (const auto &lang : model->get_trained_languages()) {

        wake_word.trained_languages.push_back(lang);

      }

      this->config_.available_wake_words.push_back(std::move(wake_word));

    }

  } else {

#endif

    // No microWakeWord

    this->config_.max_active_wake_words = 0;

#ifdef USE_MICRO_WAKE_WORD

  }

#endif


  return this->config_;

};


VoiceAssistant *global_voice_assistant = nullptr;  // NOLINT(cppcoreguidelines-avoid-non-const-global-variables)


}  // namespace voice_assistant


}  // namespace esphome


#endif  // USE_VOICE_ASSISTANT

esphome::Component::mark_failed
virtual void mark_failed()
Mark this component as failed.
Definition component.cpp:128

esphome::Component::set_interval
void set_interval(const std::string &name, uint32_t interval, std::function< void()> &&f)
Set an interval function with a unique name.
Definition component.cpp:55

esphome::Component::cancel_timeout
bool cancel_timeout(const std::string &name)
Cancel a timeout function.
Definition component.cpp:76

esphome::Component::status_clear_error
void status_clear_error()
Definition component.cpp:184

esphome::Component::status_has_error
bool status_has_error() const
Definition component.cpp:160

esphome::Component::cancel_interval
bool cancel_interval(const std::string &name)
Cancel an interval function.
Definition component.cpp:59

esphome::Component::status_set_error
void status_set_error(const char *message="unspecified")
Definition component.cpp:169

esphome::Component::defer
void defer(const std::string &name, std::function< void()> &&f)
Defer a callback to the next loop() call.
Definition component.cpp:140

esphome::Component::set_timeout
void set_timeout(const std::string &name, uint32_t timeout, std::function< void()> &&f)
Set a timeout function with a unique name.
Definition component.cpp:72

esphome::RAMAllocator
An STL allocator that uses SPI or internal RAM.
Definition helpers.h:683

esphome::RAMAllocator::deallocate
void deallocate(T *p, size_t n)
Definition helpers.h:741

esphome::RAMAllocator::allocate
T * allocate(size_t n)
Definition helpers.h:703

esphome::RingBuffer::create
static std::unique_ptr< RingBuffer > create(size_t len)
Definition ring_buffer.cpp:22

esphome::Trigger::trigger
void trigger(Ts... x)
Inform the parent automation that the event has triggered.
Definition automation.h:96

esphome::api::APIConnection
Definition api_connection.h:62

esphome::api::APIConnection::get_client_combined_info
std::string get_client_combined_info() const
Definition api_connection.h:424

esphome::api::APIServerConnectionBase::send_voice_assistant_audio
bool send_voice_assistant_audio(const VoiceAssistantAudio &msg)
Definition api_pb2_service.cpp:518

esphome::api::APIServerConnectionBase::send_voice_assistant_request
bool send_voice_assistant_request(const VoiceAssistantRequest &msg)
Definition api_pb2_service.cpp:506

esphome::api::APIServerConnectionBase::send_voice_assistant_announce_finished
bool send_voice_assistant_announce_finished(const VoiceAssistantAnnounceFinished &msg)
Definition api_pb2_service.cpp:530

esphome::api::VoiceAssistantAnnounceFinished
Definition api_pb2.h:2072

esphome::api::VoiceAssistantAnnounceFinished::success
bool success
Definition api_pb2.h:2074

esphome::api::VoiceAssistantAnnounceRequest
Definition api_pb2.h:2056

esphome::api::VoiceAssistantAnnounceRequest::start_conversation
bool start_conversation
Definition api_pb2.h:2061

esphome::api::VoiceAssistantAnnounceRequest::media_id
std::string media_id
Definition api_pb2.h:2058

esphome::api::VoiceAssistantAnnounceRequest::preannounce_media_id
std::string preannounce_media_id
Definition api_pb2.h:2060

esphome::api::VoiceAssistantAnnounceRequest::text
std::string text
Definition api_pb2.h:2059

esphome::api::VoiceAssistantAudio
Definition api_pb2.h:2024

esphome::api::VoiceAssistantAudio::data
std::string data
Definition api_pb2.h:2026

esphome::api::VoiceAssistantAudioSettings
Definition api_pb2.h:1952

esphome::api::VoiceAssistantAudioSettings::auto_gain
uint32_t auto_gain
Definition api_pb2.h:1955

esphome::api::VoiceAssistantAudioSettings::noise_suppression_level
uint32_t noise_suppression_level
Definition api_pb2.h:1954

esphome::api::VoiceAssistantAudioSettings::volume_multiplier
float volume_multiplier
Definition api_pb2.h:1956

esphome::api::VoiceAssistantEventResponse
Definition api_pb2.h:2010

esphome::api::VoiceAssistantEventResponse::event_type
enums::VoiceAssistantEvent event_type
Definition api_pb2.h:2012

esphome::api::VoiceAssistantEventResponse::data
std::vector< VoiceAssistantEventData > data
Definition api_pb2.h:2013

esphome::api::VoiceAssistantRequest
Definition api_pb2.h:1967

esphome::api::VoiceAssistantRequest::flags
uint32_t flags
Definition api_pb2.h:1971

esphome::api::VoiceAssistantRequest::start
bool start
Definition api_pb2.h:1969

esphome::api::VoiceAssistantRequest::wake_word_phrase
std::string wake_word_phrase
Definition api_pb2.h:1973

esphome::api::VoiceAssistantRequest::conversation_id
std::string conversation_id
Definition api_pb2.h:1970

esphome::api::VoiceAssistantRequest::audio_settings
VoiceAssistantAudioSettings audio_settings
Definition api_pb2.h:1972

esphome::api::VoiceAssistantTimerEventResponse
Definition api_pb2.h:2038

esphome::api::VoiceAssistantTimerEventResponse::name
std::string name
Definition api_pb2.h:2042

esphome::api::VoiceAssistantTimerEventResponse::event_type
enums::VoiceAssistantTimerEvent event_type
Definition api_pb2.h:2040

esphome::api::VoiceAssistantTimerEventResponse::timer_id
std::string timer_id
Definition api_pb2.h:2041

esphome::api::VoiceAssistantTimerEventResponse::total_seconds
uint32_t total_seconds
Definition api_pb2.h:2043

esphome::api::VoiceAssistantTimerEventResponse::seconds_left
uint32_t seconds_left
Definition api_pb2.h:2044

esphome::api::VoiceAssistantTimerEventResponse::is_active
bool is_active
Definition api_pb2.h:2045

esphome::media_player::MediaPlayerCall::set_media_url
MediaPlayerCall & set_media_url(const std::string &url)
Definition media_player.cpp:119

esphome::media_player::MediaPlayerCall::perform
void perform()
Definition media_player.cpp:73

esphome::media_player::MediaPlayerCall::set_announcement
MediaPlayerCall & set_announcement(bool announce)
Definition media_player.cpp:129

esphome::media_player::MediaPlayerCall::set_command
MediaPlayerCall & set_command(MediaPlayerCommand command)
Definition media_player.cpp:92

esphome::media_player::MediaPlayer::state
MediaPlayerState state
Definition media_player.h:95

esphome::media_player::MediaPlayer::make_call
MediaPlayerCall make_call()
Definition media_player.h:98

esphome::micro_wake_word::MicroWakeWord::get_wake_words
std::vector< WakeWordModel * > get_wake_words()
Definition micro_wake_word.cpp:210

esphome::microphone::MicrophoneSource::is_stopped
bool is_stopped() const
Definition microphone_source.h:64

esphome::microphone::MicrophoneSource::is_running
bool is_running() const
Definition microphone_source.h:63

esphome::microphone::MicrophoneSource::stop
void stop()
Definition microphone_source.cpp:39

esphome::microphone::MicrophoneSource::start
void start()
Definition microphone_source.cpp:32

esphome::microphone::MicrophoneSource::add_data_callback
void add_data_callback(std::function< void(const std::vector< uint8_t > &)> &&data_callback)
Definition microphone_source.cpp:9

esphome::speaker::Speaker::play
virtual size_t play(const uint8_t *data, size_t length)=0
Plays the provided audio data.

esphome::speaker::Speaker::is_running
bool is_running() const
Definition speaker.h:66

esphome::speaker::Speaker::has_buffered_data
virtual bool has_buffered_data() const =0

esphome::speaker::Speaker::start
virtual void start()=0

esphome::speaker::Speaker::stop
virtual void stop()=0

esphome::voice_assistant::VoiceAssistant
Definition voice_assistant.h:93

esphome::voice_assistant::VoiceAssistant::socket_
std::unique_ptr< socket::Socket > socket_
Definition voice_assistant.h:222

esphome::voice_assistant::VoiceAssistant::VoiceAssistant
VoiceAssistant()
Definition voice_assistant.cpp:29

esphome::voice_assistant::VoiceAssistant::timer_started_trigger_
Trigger< Timer > * timer_started_trigger_
Definition voice_assistant.h:250

esphome::voice_assistant::VoiceAssistant::get_configuration
const Configuration & get_configuration()
Definition voice_assistant.cpp:893

esphome::voice_assistant::VoiceAssistant::local_output_
bool local_output_
Definition voice_assistant.h:275

esphome::voice_assistant::VoiceAssistant::speaker_buffer_size_
size_t speaker_buffer_size_
Definition voice_assistant.h:264

esphome::voice_assistant::VoiceAssistant::start_udp_socket_
bool start_udp_socket_()
Definition voice_assistant.cpp:42

esphome::voice_assistant::VoiceAssistant::timers_
std::unordered_map< std::string, Timer > timers_
Definition voice_assistant.h:248

esphome::voice_assistant::VoiceAssistant::wait_for_stream_end_
bool wait_for_stream_end_
Definition voice_assistant.h:266

esphome::voice_assistant::VoiceAssistant::speaker_buffer_index_
size_t speaker_buffer_index_
Definition voice_assistant.h:263

esphome::voice_assistant::VoiceAssistant::udp_socket_running_
bool udp_socket_running_
Definition voice_assistant.h:300

esphome::voice_assistant::VoiceAssistant::wake_word_
std::string wake_word_
Definition voice_assistant.h:279

esphome::voice_assistant::VoiceAssistant::on_timer_event
void on_timer_event(const api::VoiceAssistantTimerEventResponse &msg)
Definition voice_assistant.cpp:788

esphome::voice_assistant::VoiceAssistant::start_streaming
void start_streaming()
Definition voice_assistant.cpp:472

esphome::voice_assistant::VoiceAssistant::signal_stop_
void signal_stop_()
Definition voice_assistant.cpp:575

esphome::voice_assistant::VoiceAssistant::state_
State state_
Definition voice_assistant.h:296

esphome::voice_assistant::VoiceAssistant::on_audio
void on_audio(const api::VoiceAssistantAudio &msg)
Definition voice_assistant.cpp:772

esphome::voice_assistant::VoiceAssistant::loop
void loop() override
Definition voice_assistant.cpp:162

esphome::voice_assistant::VoiceAssistant::volume_multiplier_
float volume_multiplier_
Definition voice_assistant.h:286

esphome::voice_assistant::VoiceAssistant::media_player_
media_player::MediaPlayer * media_player_
Definition voice_assistant.h:270

esphome::voice_assistant::VoiceAssistant::use_wake_word_
bool use_wake_word_
Definition voice_assistant.h:283

esphome::voice_assistant::VoiceAssistant::get_setup_priority
float get_setup_priority() const override
Definition voice_assistant.cpp:40

esphome::voice_assistant::VoiceAssistant::stt_end_trigger_
Trigger< std::string > * stt_end_trigger_
Definition voice_assistant.h:237

esphome::voice_assistant::VoiceAssistant::audio_mode_
AudioMode audio_mode_
Definition voice_assistant.h:299

esphome::voice_assistant::VoiceAssistant::set_state_
void set_state_(State state)
Definition voice_assistant.cpp:453

esphome::voice_assistant::VoiceAssistant::timer_cancelled_trigger_
Trigger< Timer > * timer_cancelled_trigger_
Definition voice_assistant.h:253

esphome::voice_assistant::VoiceAssistant::error_trigger_
Trigger< std::string, std::string > * error_trigger_
Definition voice_assistant.h:240

esphome::voice_assistant::VoiceAssistant::speaker_buffer_
uint8_t * speaker_buffer_
Definition voice_assistant.h:262

esphome::voice_assistant::VoiceAssistant::client_subscription
void client_subscription(api::APIConnection *client, bool subscribe)
Definition voice_assistant.cpp:398

esphome::voice_assistant::VoiceAssistant::stt_vad_end_trigger_
Trigger * stt_vad_end_trigger_
Definition voice_assistant.h:231

esphome::voice_assistant::VoiceAssistant::deallocate_buffers_
void deallocate_buffers_()
Definition voice_assistant.cpp:137

esphome::voice_assistant::VoiceAssistant::silence_detection_
bool silence_detection_
Definition voice_assistant.h:292

esphome::voice_assistant::VoiceAssistant::timer_tick_trigger_
Trigger< std::vector< Timer > > * timer_tick_trigger_
Definition voice_assistant.h:254

esphome::voice_assistant::VoiceAssistant::clear_buffers_
void clear_buffers_()
Definition voice_assistant.cpp:117

esphome::voice_assistant::VoiceAssistant::setup
void setup() override
Definition voice_assistant.cpp:31

esphome::voice_assistant::VoiceAssistant::allocate_buffers_
bool allocate_buffers_()
Definition voice_assistant.cpp:85

esphome::voice_assistant::VoiceAssistant::ring_buffer_
std::shared_ptr< RingBuffer > ring_buffer_
Definition voice_assistant.h:281

esphome::voice_assistant::VoiceAssistant::noise_suppression_level_
uint8_t noise_suppression_level_
Definition voice_assistant.h:284

esphome::voice_assistant::VoiceAssistant::on_event
void on_event(const api::VoiceAssistantEventResponse &msg)
Definition voice_assistant.cpp:597

esphome::voice_assistant::VoiceAssistant::intent_end_trigger_
Trigger * intent_end_trigger_
Definition voice_assistant.h:225

esphome::voice_assistant::VoiceAssistant::client_disconnected_trigger_
Trigger * client_disconnected_trigger_
Definition voice_assistant.h:244

esphome::voice_assistant::VoiceAssistant::wake_word_detected_trigger_
Trigger * wake_word_detected_trigger_
Definition voice_assistant.h:236

esphome::voice_assistant::VoiceAssistant::conversation_timeout_
uint32_t conversation_timeout_
Definition voice_assistant.h:287

esphome::voice_assistant::VoiceAssistant::media_player_wait_for_announcement_start_
bool media_player_wait_for_announcement_start_
Definition voice_assistant.h:271

esphome::voice_assistant::VoiceAssistant::stt_vad_start_trigger_
Trigger * stt_vad_start_trigger_
Definition voice_assistant.h:230

esphome::voice_assistant::VoiceAssistant::tts_stream_start_trigger_
Trigger * tts_stream_start_trigger_
Definition voice_assistant.h:233

esphome::voice_assistant::VoiceAssistant::write_speaker_
void write_speaker_()
Definition voice_assistant.cpp:380

esphome::voice_assistant::VoiceAssistant::tts_start_trigger_
Trigger< std::string > * tts_start_trigger_
Definition voice_assistant.h:239

esphome::voice_assistant::VoiceAssistant::on_announce
void on_announce(const api::VoiceAssistantAnnounceRequest &msg)
Definition voice_assistant.cpp:840

esphome::voice_assistant::VoiceAssistant::start_trigger_
Trigger * start_trigger_
Definition voice_assistant.h:229

esphome::voice_assistant::VoiceAssistant::request_start
void request_start(bool continuous, bool silence_detection)
Definition voice_assistant.cpp:518

esphome::voice_assistant::VoiceAssistant::auto_gain_
uint8_t auto_gain_
Definition voice_assistant.h:285

esphome::voice_assistant::VoiceAssistant::api_client_
api::APIConnection * api_client_
Definition voice_assistant.h:246

esphome::voice_assistant::VoiceAssistant::tts_end_trigger_
Trigger< std::string > * tts_end_trigger_
Definition voice_assistant.h:238

esphome::voice_assistant::VoiceAssistant::request_stop
void request_stop()
Definition voice_assistant.cpp:533

esphome::voice_assistant::VoiceAssistant::config_
Configuration config_
Definition voice_assistant.h:303

esphome::voice_assistant::VoiceAssistant::dest_addr_
struct sockaddr_storage dest_addr_
Definition voice_assistant.h:223

esphome::voice_assistant::VoiceAssistant::continuous_
bool continuous_
Definition voice_assistant.h:291

esphome::voice_assistant::VoiceAssistant::timer_tick_
void timer_tick_()
Definition voice_assistant.cpp:827

esphome::voice_assistant::VoiceAssistant::speaker_
speaker::Speaker * speaker_
Definition voice_assistant.h:261

esphome::voice_assistant::VoiceAssistant::idle_trigger_
Trigger * idle_trigger_
Definition voice_assistant.h:241

esphome::voice_assistant::VoiceAssistant::tts_stream_end_trigger_
Trigger * tts_stream_end_trigger_
Definition voice_assistant.h:234

esphome::voice_assistant::VoiceAssistant::stream_ended_
bool stream_ended_
Definition voice_assistant.h:267

esphome::voice_assistant::VoiceAssistant::send_buffer_
uint8_t * send_buffer_
Definition voice_assistant.h:289

esphome::voice_assistant::VoiceAssistant::timer_finished_trigger_
Trigger< Timer > * timer_finished_trigger_
Definition voice_assistant.h:251

esphome::voice_assistant::VoiceAssistant::intent_start_trigger_
Trigger * intent_start_trigger_
Definition voice_assistant.h:226

esphome::voice_assistant::VoiceAssistant::start_playback_timeout_
void start_playback_timeout_()
Definition voice_assistant.cpp:586

esphome::voice_assistant::VoiceAssistant::reset_conversation_id
void reset_conversation_id()
Definition voice_assistant.cpp:157

esphome::voice_assistant::VoiceAssistant::conversation_id_
std::string conversation_id_
Definition voice_assistant.h:277

esphome::voice_assistant::VoiceAssistant::client_connected_trigger_
Trigger * client_connected_trigger_
Definition voice_assistant.h:243

esphome::voice_assistant::VoiceAssistant::mic_source_
microphone::MicrophoneSource * mic_source_
Definition voice_assistant.h:258

esphome::voice_assistant::VoiceAssistant::end_trigger_
Trigger * end_trigger_
Definition voice_assistant.h:228

esphome::voice_assistant::VoiceAssistant::micro_wake_word_
micro_wake_word::MicroWakeWord * micro_wake_word_
Definition voice_assistant.h:306

esphome::voice_assistant::VoiceAssistant::media_player_wait_for_announcement_end_
bool media_player_wait_for_announcement_end_
Definition voice_assistant.h:272

esphome::voice_assistant::VoiceAssistant::speaker_bytes_received_
size_t speaker_bytes_received_
Definition voice_assistant.h:265

esphome::voice_assistant::VoiceAssistant::timer_updated_trigger_
Trigger< Timer > * timer_updated_trigger_
Definition voice_assistant.h:252

esphome::voice_assistant::VoiceAssistant::continue_conversation_
bool continue_conversation_
Definition voice_assistant.h:294

esphome::voice_assistant::VoiceAssistant::failed_to_start
void failed_to_start()
Definition voice_assistant.cpp:466

esphome::voice_assistant::VoiceAssistant::on_set_configuration
void on_set_configuration(const std::vector< std::string > &active_wake_words)
Definition voice_assistant.cpp:872

esphome::voice_assistant::VoiceAssistant::listening_trigger_
Trigger * listening_trigger_
Definition voice_assistant.h:227

esphome::voice_assistant::VoiceAssistant::timer_tick_running_
bool timer_tick_running_
Definition voice_assistant.h:256

esphome::voice_assistant::VoiceAssistant::desired_state_
State desired_state_
Definition voice_assistant.h:297

defines.h

state
bool state
Definition fan.h:0

socklen_t
uint32_t socklen_t
Definition headers.h:97

ssize_t
__int64 ssize_t
Definition httplib.h:175

log.h

esphome::api::enums::VOICE_ASSISTANT_REQUEST_USE_WAKE_WORD
@ VOICE_ASSISTANT_REQUEST_USE_WAKE_WORD
Definition api_pb2.h:192

esphome::api::enums::VOICE_ASSISTANT_REQUEST_USE_VAD
@ VOICE_ASSISTANT_REQUEST_USE_VAD
Definition api_pb2.h:191

esphome::api::enums::VOICE_ASSISTANT_TIMER_UPDATED
@ VOICE_ASSISTANT_TIMER_UPDATED
Definition api_pb2.h:213

esphome::api::enums::VOICE_ASSISTANT_TIMER_STARTED
@ VOICE_ASSISTANT_TIMER_STARTED
Definition api_pb2.h:212

esphome::api::enums::VOICE_ASSISTANT_TIMER_FINISHED
@ VOICE_ASSISTANT_TIMER_FINISHED
Definition api_pb2.h:215

esphome::api::enums::VOICE_ASSISTANT_TIMER_CANCELLED
@ VOICE_ASSISTANT_TIMER_CANCELLED
Definition api_pb2.h:214

esphome::api::enums::VOICE_ASSISTANT_INTENT_END
@ VOICE_ASSISTANT_INTENT_END
Definition api_pb2.h:201

esphome::api::enums::VOICE_ASSISTANT_RUN_START
@ VOICE_ASSISTANT_RUN_START
Definition api_pb2.h:196

esphome::api::enums::VOICE_ASSISTANT_TTS_END
@ VOICE_ASSISTANT_TTS_END
Definition api_pb2.h:203

esphome::api::enums::VOICE_ASSISTANT_RUN_END
@ VOICE_ASSISTANT_RUN_END
Definition api_pb2.h:197

esphome::api::enums::VOICE_ASSISTANT_WAKE_WORD_START
@ VOICE_ASSISTANT_WAKE_WORD_START
Definition api_pb2.h:204

esphome::api::enums::VOICE_ASSISTANT_TTS_STREAM_END
@ VOICE_ASSISTANT_TTS_STREAM_END
Definition api_pb2.h:209

esphome::api::enums::VOICE_ASSISTANT_STT_END
@ VOICE_ASSISTANT_STT_END
Definition api_pb2.h:199

esphome::api::enums::VOICE_ASSISTANT_STT_VAD_START
@ VOICE_ASSISTANT_STT_VAD_START
Definition api_pb2.h:206

esphome::api::enums::VOICE_ASSISTANT_TTS_START
@ VOICE_ASSISTANT_TTS_START
Definition api_pb2.h:202

esphome::api::enums::VOICE_ASSISTANT_STT_START
@ VOICE_ASSISTANT_STT_START
Definition api_pb2.h:198

esphome::api::enums::VOICE_ASSISTANT_INTENT_START
@ VOICE_ASSISTANT_INTENT_START
Definition api_pb2.h:200

esphome::api::enums::VOICE_ASSISTANT_TTS_STREAM_START
@ VOICE_ASSISTANT_TTS_STREAM_START
Definition api_pb2.h:208

esphome::api::enums::VOICE_ASSISTANT_ERROR
@ VOICE_ASSISTANT_ERROR
Definition api_pb2.h:195

esphome::api::enums::VOICE_ASSISTANT_STT_VAD_END
@ VOICE_ASSISTANT_STT_VAD_END
Definition api_pb2.h:207

esphome::api::enums::VOICE_ASSISTANT_WAKE_WORD_END
@ VOICE_ASSISTANT_WAKE_WORD_END
Definition api_pb2.h:205

esphome::media_player::MEDIA_PLAYER_STATE_ANNOUNCING
@ MEDIA_PLAYER_STATE_ANNOUNCING
Definition media_player.h:14

esphome::media_player::MEDIA_PLAYER_COMMAND_ENQUEUE
@ MEDIA_PLAYER_COMMAND_ENQUEUE
Definition media_player.h:27

esphome::media_player::MEDIA_PLAYER_COMMAND_STOP
@ MEDIA_PLAYER_COMMAND_STOP
Definition media_player.h:21

esphome::setup_priority::AFTER_CONNECTION
const float AFTER_CONNECTION
For components that should be initialized after a data connection (API/MQTT) is connected.
Definition component.cpp:27

esphome::socket::socket
std::unique_ptr< Socket > socket(int domain, int type, int protocol)
Create a socket of the given domain, type and protocol.
Definition bsd_sockets_impl.cpp:134

esphome::socket::set_sockaddr_any
socklen_t set_sockaddr_any(struct sockaddr *addr, socklen_t addrlen, uint16_t port)
Set a sockaddr to the any address and specified port for the IP version used by socket_ip().
Definition socket.cpp:51

esphome::voice_assistant::AUDIO_MODE_UDP
@ AUDIO_MODE_UDP
Definition voice_assistant.h:63

esphome::voice_assistant::AUDIO_MODE_API
@ AUDIO_MODE_API
Definition voice_assistant.h:64

esphome::voice_assistant::State
State
Definition voice_assistant.h:46

esphome::voice_assistant::State::STREAMING_RESPONSE
@ STREAMING_RESPONSE

esphome::voice_assistant::State::STOPPING_MICROPHONE
@ STOPPING_MICROPHONE

esphome::voice_assistant::State::STOP_MICROPHONE
@ STOP_MICROPHONE

esphome::voice_assistant::State::WAIT_FOR_VAD
@ WAIT_FOR_VAD

esphome::voice_assistant::State::WAITING_FOR_VAD
@ WAITING_FOR_VAD

esphome::voice_assistant::State::STARTING_PIPELINE
@ STARTING_PIPELINE

esphome::voice_assistant::State::IDLE
@ IDLE

esphome::voice_assistant::State::STREAMING_MICROPHONE
@ STREAMING_MICROPHONE

esphome::voice_assistant::State::AWAITING_RESPONSE
@ AWAITING_RESPONSE

esphome::voice_assistant::State::STARTING_MICROPHONE
@ STARTING_MICROPHONE

esphome::voice_assistant::State::START_PIPELINE
@ START_PIPELINE

esphome::voice_assistant::State::RESPONSE_FINISHED
@ RESPONSE_FINISHED

esphome::voice_assistant::State::START_MICROPHONE
@ START_MICROPHONE

esphome::voice_assistant::global_voice_assistant
VoiceAssistant * global_voice_assistant
Definition voice_assistant.cpp:925

esphome
Providing packet encoding functions for exchanging data with a remote host.
Definition a01nyub.cpp:7

esphome::voice_assistant::Configuration
Definition voice_assistant.h:87

esphome::voice_assistant::Configuration::available_wake_words
std::vector< WakeWord > available_wake_words
Definition voice_assistant.h:88

esphome::voice_assistant::Configuration::active_wake_words
std::vector< std::string > active_wake_words
Definition voice_assistant.h:89

esphome::voice_assistant::Configuration::max_active_wake_words
uint32_t max_active_wake_words
Definition voice_assistant.h:90

esphome::voice_assistant::Timer
Definition voice_assistant.h:67

esphome::voice_assistant::Timer::id
std::string id
Definition voice_assistant.h:68

esphome::voice_assistant::Timer::to_string
std::string to_string() const
Definition voice_assistant.h:74

esphome::voice_assistant::WakeWord
Definition voice_assistant.h:81

esphome::voice_assistant::WakeWord::wake_word
std::string wake_word
Definition voice_assistant.h:83

esphome::voice_assistant::WakeWord::trained_languages
std::vector< std::string > trained_languages
Definition voice_assistant.h:84

esphome::voice_assistant::WakeWord::id
std::string id
Definition voice_assistant.h:82

sockaddr_in6
Definition headers.h:72

sockaddr_in
Definition headers.h:61

sockaddr_storage
Definition headers.h:90

sockaddr_storage::ss_family
sa_family_t ss_family
Definition headers.h:92

sockaddr
Definition headers.h:83

voice_assistant.h