ESPHome: esphome/components/voice_assistant/voice_assistant.cpp Source File

#include "voice_assistant.h"

#include "esphome/core/defines.h"


#ifdef USE_VOICE_ASSISTANT


#include "esphome/components/socket/socket.h"

#include "esphome/core/log.h"


#include <cinttypes>

#include <cstdio>


namespace esphome {


namespace voice_assistant {


static const char *const TAG = "voice_assistant";


#ifdef SAMPLE_RATE_HZ

#undef SAMPLE_RATE_HZ

#endif


static const size_t SAMPLE_RATE_HZ = 16000;


static const size_t RING_BUFFER_SAMPLES = 512 * SAMPLE_RATE_HZ / 1000;  // 512 ms * 16 kHz/ 1000 ms

static const size_t RING_BUFFER_SIZE = RING_BUFFER_SAMPLES * sizeof(int16_t);

static const size_t SEND_BUFFER_SAMPLES = 32 * SAMPLE_RATE_HZ / 1000;  // 32ms * 16kHz / 1000ms

static const size_t SEND_BUFFER_SIZE = SEND_BUFFER_SAMPLES * sizeof(int16_t);

static const size_t RECEIVE_SIZE = 1024;

static const size_t SPEAKER_BUFFER_SIZE = 16 * RECEIVE_SIZE;


VoiceAssistant::VoiceAssistant() { global_voice_assistant = this; }


void VoiceAssistant::setup() {

  this->mic_source_->add_data_callback([this](const std::vector<uint8_t> &data) {

    std::shared_ptr<RingBuffer> temp_ring_buffer = this->ring_buffer_;

    if (this->ring_buffer_.use_count() > 1) {

      temp_ring_buffer->write((void *) data.data(), data.size());

    }

  });


#ifdef USE_MEDIA_PLAYER

  if (this->media_player_ != nullptr) {

    this->media_player_->add_on_state_callback([this]() {

      switch (this->media_player_->state) {

        case media_player::MediaPlayerState::MEDIA_PLAYER_STATE_ANNOUNCING:

          if (this->media_player_response_state_ == MediaPlayerResponseState::URL_SENT) {

            // State changed to announcing after receiving the url

            this->media_player_response_state_ = MediaPlayerResponseState::PLAYING;

          }

          break;

        default:

          if (this->media_player_response_state_ == MediaPlayerResponseState::PLAYING) {

            // No longer announcing the TTS response

            this->media_player_response_state_ = MediaPlayerResponseState::FINISHED;

          }

          break;

      }

    });

  }

#endif

}


float VoiceAssistant::get_setup_priority() const { return setup_priority::AFTER_CONNECTION; }


bool VoiceAssistant::start_udp_socket_() {

  this->socket_ = socket::socket(AF_INET, SOCK_DGRAM, IPPROTO_IP);

  if (this->socket_ == nullptr) {

    ESP_LOGE(TAG, "Could not create socket");

    this->mark_failed();

    return false;

  }

  int enable = 1;

  int err = this->socket_->setsockopt(SOL_SOCKET, SO_REUSEADDR, &enable, sizeof(int));

  if (err != 0) {

    ESP_LOGW(TAG, "Socket unable to set reuseaddr: errno %d", err);

    // we can still continue

  }

  err = this->socket_->setblocking(false);

  if (err != 0) {

    ESP_LOGE(TAG, "Socket unable to set nonblocking mode: errno %d", err);

    this->mark_failed();

    return false;

  }


#ifdef USE_SPEAKER

  if (this->speaker_ != nullptr) {

    struct sockaddr_storage server;


    socklen_t sl = socket::set_sockaddr_any((struct sockaddr *) &server, sizeof(server), 6055);

    if (sl == 0) {

      ESP_LOGE(TAG, "Socket unable to set sockaddr: errno %d", errno);

      this->mark_failed();

      return false;

    }


    err = this->socket_->bind((struct sockaddr *) &server, sizeof(server));

    if (err != 0) {

      ESP_LOGE(TAG, "Socket unable to bind: errno %d", errno);

      this->mark_failed();

      return false;

    }

  }

#endif

  this->udp_socket_running_ = true;

  return true;

}


bool VoiceAssistant::allocate_buffers_() {

#ifdef USE_SPEAKER

  if ((this->speaker_ != nullptr) && (this->speaker_buffer_ == nullptr)) {

    RAMAllocator<uint8_t> speaker_allocator;

    this->speaker_buffer_ = speaker_allocator.allocate(SPEAKER_BUFFER_SIZE);

    if (this->speaker_buffer_ == nullptr) {

      ESP_LOGW(TAG, "Could not allocate speaker buffer");

      return false;

    }

  }

#endif


  if (this->ring_buffer_.use_count() == 0) {

    this->ring_buffer_ = RingBuffer::create(RING_BUFFER_SIZE);

    if (this->ring_buffer_.use_count() == 0) {

      ESP_LOGE(TAG, "Could not allocate ring buffer");

      return false;

    }

  }


  if (this->send_buffer_ == nullptr) {

    RAMAllocator<uint8_t> send_allocator;

    this->send_buffer_ = send_allocator.allocate(SEND_BUFFER_SIZE);

    if (send_buffer_ == nullptr) {

      ESP_LOGW(TAG, "Could not allocate send buffer");

      return false;

    }

  }


  return true;

}


void VoiceAssistant::clear_buffers_() {

  if (this->send_buffer_ != nullptr) {

    memset(this->send_buffer_, 0, SEND_BUFFER_SIZE);

  }


  if (this->ring_buffer_ != nullptr) {

    this->ring_buffer_->reset();

  }


#ifdef USE_SPEAKER

  if ((this->speaker_ != nullptr) && (this->speaker_buffer_ != nullptr)) {

    memset(this->speaker_buffer_, 0, SPEAKER_BUFFER_SIZE);


    this->speaker_buffer_size_ = 0;

    this->speaker_buffer_index_ = 0;

    this->speaker_bytes_received_ = 0;

  }

#endif

}


void VoiceAssistant::deallocate_buffers_() {

  if (this->send_buffer_ != nullptr) {

    RAMAllocator<uint8_t> send_deallocator;

    send_deallocator.deallocate(this->send_buffer_, SEND_BUFFER_SIZE);

    this->send_buffer_ = nullptr;

  }


  if (this->ring_buffer_.use_count() > 0) {

    this->ring_buffer_.reset();

  }


#ifdef USE_SPEAKER

  if ((this->speaker_ != nullptr) && (this->speaker_buffer_ != nullptr)) {

    RAMAllocator<uint8_t> speaker_deallocator;

    speaker_deallocator.deallocate(this->speaker_buffer_, SPEAKER_BUFFER_SIZE);

    this->speaker_buffer_ = nullptr;

  }

#endif

}


void VoiceAssistant::reset_conversation_id() {

  this->conversation_id_ = "";

  ESP_LOGD(TAG, "reset conversation ID");

}


void VoiceAssistant::loop() {

  if (this->api_client_ == nullptr && this->state_ != State::IDLE && this->state_ != State::STOP_MICROPHONE &&

      this->state_ != State::STOPPING_MICROPHONE) {

    if (this->mic_source_->is_running() || this->state_ == State::STARTING_MICROPHONE) {

      this->set_state_(State::STOP_MICROPHONE, State::IDLE);

    } else {

      this->set_state_(State::IDLE, State::IDLE);

    }

    this->continuous_ = false;

    this->signal_stop_();

    this->clear_buffers_();

    return;

  }

  switch (this->state_) {

    case State::IDLE: {

      if (this->continuous_ && this->desired_state_ == State::IDLE) {

        this->idle_trigger_.trigger();

        this->set_state_(State::START_MICROPHONE, State::START_PIPELINE);

      } else {

        this->deallocate_buffers_();

      }

      break;

    }

    case State::START_MICROPHONE: {

      ESP_LOGD(TAG, "Starting Microphone");

      if (!this->allocate_buffers_()) {

        this->status_set_error(LOG_STR("Failed to allocate buffers"));

        return;

      }

      if (this->status_has_error()) {

        this->status_clear_error();

      }

      this->clear_buffers_();


      this->mic_source_->start();

      this->set_state_(State::STARTING_MICROPHONE);

      break;

    }

    case State::STARTING_MICROPHONE: {

      if (this->mic_source_->is_running()) {

        this->set_state_(this->desired_state_);

      }

      break;

    }

    case State::START_PIPELINE: {

      ESP_LOGD(TAG, "Requesting start");

      uint32_t flags = 0;

      if (!this->continue_conversation_ && this->use_wake_word_)

        flags |= api::enums::VOICE_ASSISTANT_REQUEST_USE_WAKE_WORD;

      if (this->silence_detection_)

        flags |= api::enums::VOICE_ASSISTANT_REQUEST_USE_VAD;

      api::VoiceAssistantAudioSettings audio_settings;

      audio_settings.noise_suppression_level = this->noise_suppression_level_;

      audio_settings.auto_gain = this->auto_gain_;

      audio_settings.volume_multiplier = this->volume_multiplier_;


      api::VoiceAssistantRequest msg;

      msg.start = true;

      msg.conversation_id = StringRef(this->conversation_id_);

      msg.flags = flags;

      msg.audio_settings = audio_settings;

      msg.wake_word_phrase = StringRef(this->wake_word_);


      // Reset media player state tracking

#ifdef USE_MEDIA_PLAYER

      if (this->media_player_ != nullptr) {

        this->media_player_response_state_ = MediaPlayerResponseState::IDLE;

      }

#endif


      if (this->api_client_ == nullptr ||

          !this->api_client_->send_message(msg, api::VoiceAssistantRequest::MESSAGE_TYPE)) {

        ESP_LOGW(TAG, "Could not request start");

        this->error_trigger_.trigger("not-connected", "Could not request start");

        this->continuous_ = false;

        this->set_state_(State::IDLE, State::IDLE);

        break;

      }

      this->set_state_(State::STARTING_PIPELINE);

      this->set_timeout("reset-conversation_id", this->conversation_timeout_,

                        [this]() { this->reset_conversation_id(); });

      break;

    }

    case State::STARTING_PIPELINE: {

      break;  // State changed when udp server port received

    }

    case State::STREAMING_MICROPHONE: {

      size_t available = this->ring_buffer_->available();

      while (available >= SEND_BUFFER_SIZE) {

        size_t read_bytes = this->ring_buffer_->read((void *) this->send_buffer_, SEND_BUFFER_SIZE, 0);

        if (this->audio_mode_ == AUDIO_MODE_API) {

          api::VoiceAssistantAudio msg;

          msg.data = this->send_buffer_;

          msg.data_len = read_bytes;

          this->api_client_->send_message(msg, api::VoiceAssistantAudio::MESSAGE_TYPE);

        } else {

          if (!this->udp_socket_running_) {

            if (!this->start_udp_socket_()) {

              this->set_state_(State::STOP_MICROPHONE, State::IDLE);

              break;

            }

          }

          this->socket_->sendto(this->send_buffer_, read_bytes, 0, (struct sockaddr *) &this->dest_addr_,

                                sizeof(this->dest_addr_));

        }

        available = this->ring_buffer_->available();

      }


      break;

    }

    case State::STOP_MICROPHONE: {

      if (this->mic_source_->is_running()) {

        this->mic_source_->stop();

        this->set_state_(State::STOPPING_MICROPHONE);

      } else {

        this->set_state_(this->desired_state_);

      }

      break;

    }

    case State::STOPPING_MICROPHONE: {

      if (this->mic_source_->is_stopped()) {

        this->set_state_(this->desired_state_);

      }

      break;

    }

    case State::AWAITING_RESPONSE: {

      break;  // State changed by events

    }

    case State::STREAMING_RESPONSE: {

      bool playing = false;

#ifdef USE_SPEAKER

      if (this->speaker_ != nullptr) {

        ssize_t received_len = 0;

        if (this->audio_mode_ == AUDIO_MODE_UDP) {

          if (this->speaker_buffer_index_ + RECEIVE_SIZE < SPEAKER_BUFFER_SIZE) {

            received_len = this->socket_->read(this->speaker_buffer_ + this->speaker_buffer_index_, RECEIVE_SIZE);

            if (received_len > 0) {

              this->speaker_buffer_index_ += received_len;

              this->speaker_buffer_size_ += received_len;

              this->speaker_bytes_received_ += received_len;

            }

          } else {

            ESP_LOGD(TAG, "Receive buffer full");

          }

        }

        // Build a small buffer of audio before sending to the speaker

        bool end_of_stream = this->stream_ended_ && (this->audio_mode_ == AUDIO_MODE_API || received_len < 0);

        if (this->speaker_bytes_received_ > RECEIVE_SIZE * 4 || end_of_stream)

          this->write_speaker_();

        if (this->wait_for_stream_end_) {

          this->cancel_timeout("playing");

          if (end_of_stream) {

            ESP_LOGD(TAG, "End of audio stream received");

            this->cancel_timeout("speaker-timeout");

            this->set_state_(State::RESPONSE_FINISHED, State::RESPONSE_FINISHED);

          }

          break;  // We dont want to timeout here as the STREAM_END event will take care of that.

        }

        playing = this->speaker_->is_running();

      }

#endif

#ifdef USE_MEDIA_PLAYER

      if (this->media_player_ != nullptr) {

        playing = (this->media_player_response_state_ == MediaPlayerResponseState::PLAYING);


        if (this->media_player_response_state_ == MediaPlayerResponseState::FINISHED) {

          this->media_player_response_state_ = MediaPlayerResponseState::IDLE;

          this->cancel_timeout("playing");

          ESP_LOGD(TAG, "Announcement finished playing");

          this->set_state_(State::RESPONSE_FINISHED, State::RESPONSE_FINISHED);


          api::VoiceAssistantAnnounceFinished msg;

          msg.success = true;

          this->api_client_->send_message(msg, api::VoiceAssistantAnnounceFinished::MESSAGE_TYPE);

          break;

        }

      }

#endif

      if (playing) {

        this->start_playback_timeout_();

      }

      break;

    }

    case State::RESPONSE_FINISHED: {

#ifdef USE_SPEAKER

      if (this->speaker_ != nullptr) {

        if (this->speaker_buffer_size_ > 0) {

          this->write_speaker_();

          break;

        }

        if (this->speaker_->has_buffered_data() || this->speaker_->is_running()) {

          break;

        }

        ESP_LOGD(TAG, "Speaker has finished outputting all audio");

        this->speaker_->stop();

        this->cancel_timeout("speaker-timeout");

        this->cancel_timeout("playing");


        this->clear_buffers_();


        this->wait_for_stream_end_ = false;

        this->stream_ended_ = false;


        this->tts_stream_end_trigger_.trigger();

      }

#endif

      if (this->continue_conversation_) {

        this->set_state_(State::START_MICROPHONE, State::START_PIPELINE);

      } else {

        this->set_state_(State::IDLE, State::IDLE);

      }

      break;

    }

    default:

      break;

  }

}


#ifdef USE_SPEAKER


void VoiceAssistant::write_speaker_() {

  if ((this->speaker_ != nullptr) && (this->speaker_buffer_ != nullptr)) {

    if (this->speaker_buffer_size_ > 0) {

      size_t write_chunk = std::min<size_t>(this->speaker_buffer_size_, 4 * 1024);

      size_t written = this->speaker_->play(this->speaker_buffer_, write_chunk);

      if (written > 0) {

        memmove(this->speaker_buffer_, this->speaker_buffer_ + written, this->speaker_buffer_size_ - written);

        this->speaker_buffer_size_ -= written;

        this->speaker_buffer_index_ -= written;

        this->set_timeout("speaker-timeout", 5000, [this]() { this->speaker_->stop(); });

      } else {

        ESP_LOGV(TAG, "Speaker buffer full, trying again next loop");

      }

    }

  }

}


#endif


void VoiceAssistant::client_subscription(api::APIConnection *client, bool subscribe) {

  if (!subscribe) {

    if (this->api_client_ == nullptr || client != this->api_client_) {

      ESP_LOGE(TAG, "Client attempting to unsubscribe that is not the current API Client");

      return;

    }

    this->api_client_ = nullptr;

    this->client_disconnected_trigger_.trigger();

    return;

  }


  if (this->api_client_ != nullptr) {

    char current_peername[socket::SOCKADDR_STR_LEN];

    char new_peername[socket::SOCKADDR_STR_LEN];

    ESP_LOGE(TAG,

             "Multiple API Clients attempting to connect to Voice Assistant\n"

             "  Current client: %s (%s)\n"

             "  New client: %s (%s)",

             this->api_client_->get_name(), this->api_client_->get_peername_to(current_peername), client->get_name(),

             client->get_peername_to(new_peername));

    return;

  }


  this->api_client_ = client;

  this->client_connected_trigger_.trigger();

}


static const LogString *voice_assistant_state_to_string(State state) {

  switch (state) {

    case State::IDLE:

      return LOG_STR("IDLE");

    case State::START_MICROPHONE:

      return LOG_STR("START_MICROPHONE");

    case State::STARTING_MICROPHONE:

      return LOG_STR("STARTING_MICROPHONE");

    case State::WAIT_FOR_VAD:

      return LOG_STR("WAIT_FOR_VAD");

    case State::WAITING_FOR_VAD:

      return LOG_STR("WAITING_FOR_VAD");

    case State::START_PIPELINE:

      return LOG_STR("START_PIPELINE");

    case State::STARTING_PIPELINE:

      return LOG_STR("STARTING_PIPELINE");

    case State::STREAMING_MICROPHONE:

      return LOG_STR("STREAMING_MICROPHONE");

    case State::STOP_MICROPHONE:

      return LOG_STR("STOP_MICROPHONE");

    case State::STOPPING_MICROPHONE:

      return LOG_STR("STOPPING_MICROPHONE");

    case State::AWAITING_RESPONSE:

      return LOG_STR("AWAITING_RESPONSE");

    case State::STREAMING_RESPONSE:

      return LOG_STR("STREAMING_RESPONSE");

    case State::RESPONSE_FINISHED:

      return LOG_STR("RESPONSE_FINISHED");

    default:

      return LOG_STR("UNKNOWN");

  }

};


void VoiceAssistant::set_state_(State state) {

  State old_state = this->state_;

  this->state_ = state;

  ESP_LOGD(TAG, "State changed from %s to %s", LOG_STR_ARG(voice_assistant_state_to_string(old_state)),

           LOG_STR_ARG(voice_assistant_state_to_string(state)));

}


void VoiceAssistant::set_state_(State state, State desired_state) {

  this->set_state_(state);

  this->desired_state_ = desired_state;

  ESP_LOGD(TAG, "Desired state set to %s", LOG_STR_ARG(voice_assistant_state_to_string(desired_state)));

}


void VoiceAssistant::failed_to_start() {

  ESP_LOGE(TAG, "Failed to start server. See Home Assistant logs for more details.");

  this->error_trigger_.trigger("failed-to-start", "Failed to start server. See Home Assistant logs for more details.");

  this->set_state_(State::STOP_MICROPHONE, State::IDLE);

}


void VoiceAssistant::start_streaming() {

  if (this->state_ != State::STARTING_PIPELINE) {

    this->signal_stop_();

    return;

  }


  ESP_LOGD(TAG, "Client started, streaming microphone");

  this->audio_mode_ = AUDIO_MODE_API;


  if (this->mic_source_->is_running()) {

    this->set_state_(State::STREAMING_MICROPHONE, State::STREAMING_MICROPHONE);

  } else {

    this->set_state_(State::START_MICROPHONE, State::STREAMING_MICROPHONE);

  }

}


void VoiceAssistant::start_streaming(struct sockaddr_storage *addr, uint16_t port) {

  if (this->state_ != State::STARTING_PIPELINE) {

    this->signal_stop_();

    return;

  }


  ESP_LOGD(TAG, "Client started, streaming microphone");

  this->audio_mode_ = AUDIO_MODE_UDP;


  memcpy(&this->dest_addr_, addr, sizeof(this->dest_addr_));

  if (this->dest_addr_.ss_family == AF_INET) {

    ((struct sockaddr_in *) &this->dest_addr_)->sin_port = htons(port);

  }

#if LWIP_IPV6

  else if (this->dest_addr_.ss_family == AF_INET6) {

    ((struct sockaddr_in6 *) &this->dest_addr_)->sin6_port = htons(port);

  }

#endif

  else {

    ESP_LOGW(TAG, "Unknown address family: %d", this->dest_addr_.ss_family);

    return;

  }


  if (this->mic_source_->is_running()) {

    this->set_state_(State::STREAMING_MICROPHONE, State::STREAMING_MICROPHONE);

  } else {

    this->set_state_(State::START_MICROPHONE, State::STREAMING_MICROPHONE);

  }

}


void VoiceAssistant::request_start(bool continuous, bool silence_detection) {

  if (this->api_client_ == nullptr) {

    ESP_LOGE(TAG, "No API client connected");

    this->set_state_(State::IDLE, State::IDLE);

    this->continuous_ = false;

    return;

  }

  if (this->state_ == State::IDLE) {

    this->continuous_ = continuous;

    this->silence_detection_ = silence_detection;


    this->set_state_(State::START_MICROPHONE, State::START_PIPELINE);

  }

}


void VoiceAssistant::request_stop() {

  this->continuous_ = false;

  this->continue_conversation_ = false;


  switch (this->state_) {

    case State::IDLE:

      break;

    case State::START_MICROPHONE:

    case State::STARTING_MICROPHONE:

    case State::WAIT_FOR_VAD:

    case State::WAITING_FOR_VAD:

    case State::START_PIPELINE:

      this->set_state_(State::STOP_MICROPHONE, State::IDLE);

      break;

    case State::STARTING_PIPELINE:

    case State::STREAMING_MICROPHONE:

      this->signal_stop_();

      this->set_state_(State::STOP_MICROPHONE, State::IDLE);

      break;

    case State::STOP_MICROPHONE:

    case State::STOPPING_MICROPHONE:

      this->desired_state_ = State::IDLE;

      break;

    case State::AWAITING_RESPONSE:

      this->signal_stop_();

      break;

    case State::STREAMING_RESPONSE:

#ifdef USE_MEDIA_PLAYER

      // Stop any ongoing media player announcement

      if (this->media_player_ != nullptr) {

        this->media_player_->make_call()

            .set_command(media_player::MEDIA_PLAYER_COMMAND_STOP)

            .set_announcement(true)

            .perform();

      }

      if (this->started_streaming_tts_) {

        // Haven't reached the TTS_END stage, so send the stop signal to HA.

        this->signal_stop_();

      }

#endif

      break;

    case State::RESPONSE_FINISHED:

      break;  // Let the incoming audio stream finish then it will go to idle.

  }

}


void VoiceAssistant::signal_stop_() {

  memset(&this->dest_addr_, 0, sizeof(this->dest_addr_));

  if (this->api_client_ == nullptr) {

    return;

  }

  ESP_LOGD(TAG, "Signaling stop");

  api::VoiceAssistantRequest msg;

  msg.start = false;

  this->api_client_->send_message(msg, api::VoiceAssistantRequest::MESSAGE_TYPE);

}


void VoiceAssistant::start_playback_timeout_() {

  this->set_timeout("playing", 2000, [this]() {

    this->cancel_timeout("speaker-timeout");

    this->set_state_(State::RESPONSE_FINISHED, State::RESPONSE_FINISHED);


    api::VoiceAssistantAnnounceFinished msg;

    msg.success = true;

    this->api_client_->send_message(msg, api::VoiceAssistantAnnounceFinished::MESSAGE_TYPE);

  });

}


void VoiceAssistant::on_event(const api::VoiceAssistantEventResponse &msg) {

  ESP_LOGD(TAG, "Event Type: %" PRId32, msg.event_type);

  switch (msg.event_type) {

    case api::enums::VOICE_ASSISTANT_RUN_START:

      ESP_LOGD(TAG, "Assist Pipeline running");

#ifdef USE_MEDIA_PLAYER

      this->started_streaming_tts_ = false;

      for (const auto &arg : msg.data) {

        if (arg.name == "url") {

          this->tts_response_url_ = arg.value;

        }

      }

#endif

      this->defer([this]() { this->start_trigger_.trigger(); });

      break;

    case api::enums::VOICE_ASSISTANT_WAKE_WORD_START:

      break;

    case api::enums::VOICE_ASSISTANT_WAKE_WORD_END: {

      ESP_LOGD(TAG, "Wake word detected");

      this->defer([this]() { this->wake_word_detected_trigger_.trigger(); });

      break;

    }

    case api::enums::VOICE_ASSISTANT_STT_START:

      ESP_LOGD(TAG, "STT started");

      this->defer([this]() { this->listening_trigger_.trigger(); });

      break;

    case api::enums::VOICE_ASSISTANT_STT_END: {

      std::string text;

      for (const auto &arg : msg.data) {

        if (arg.name == "text") {

          text = arg.value;

        }

      }

      if (text.empty()) {

        ESP_LOGW(TAG, "No text in STT_END event");

        return;

      } else if (text.length() > 500) {

        text.resize(497);

        text += "...";

      }

      ESP_LOGD(TAG, "Speech recognised as: \"%s\"", text.c_str());

      this->defer([this, text]() { this->stt_end_trigger_.trigger(text); });

      break;

    }

    case api::enums::VOICE_ASSISTANT_INTENT_START:

      ESP_LOGD(TAG, "Intent started");

      this->defer([this]() { this->intent_start_trigger_.trigger(); });

      break;

    case api::enums::VOICE_ASSISTANT_INTENT_PROGRESS: {

      ESP_LOGD(TAG, "Intent progress");

      std::string tts_url_for_trigger = "";

#ifdef USE_MEDIA_PLAYER

      if (this->media_player_ != nullptr) {

        for (const auto &arg : msg.data) {

          if ((arg.name == "tts_start_streaming") && (arg.value == "1") && !this->tts_response_url_.empty()) {

            this->media_player_response_state_ = MediaPlayerResponseState::URL_SENT;


            this->media_player_->make_call().set_media_url(this->tts_response_url_).set_announcement(true).perform();


            this->started_streaming_tts_ = true;

            this->start_playback_timeout_();


            tts_url_for_trigger = this->tts_response_url_;

            this->tts_response_url_.clear();  // Reset streaming URL

            this->set_state_(State::STREAMING_RESPONSE, State::STREAMING_RESPONSE);

          }

        }

      }

#endif

      this->defer([this, tts_url_for_trigger]() { this->intent_progress_trigger_.trigger(tts_url_for_trigger); });

      break;

    }

    case api::enums::VOICE_ASSISTANT_INTENT_END: {

      for (const auto &arg : msg.data) {

        if (arg.name == "conversation_id") {

          this->conversation_id_ = arg.value;

        } else if (arg.name == "continue_conversation") {

          this->continue_conversation_ = (arg.value == "1");

        }

      }

      this->defer([this]() { this->intent_end_trigger_.trigger(); });

      break;

    }

    case api::enums::VOICE_ASSISTANT_TTS_START: {

      std::string text;

      for (const auto &arg : msg.data) {

        if (arg.name == "text") {

          text = arg.value;

        }

      }

      if (text.empty()) {

        ESP_LOGW(TAG, "No text in TTS_START event");

        return;

      }

      if (text.length() > 500) {

        text.resize(497);

        text += "...";

      }

      ESP_LOGD(TAG, "Response: \"%s\"", text.c_str());

      this->defer([this, text]() {

        this->tts_start_trigger_.trigger(text);

#ifdef USE_SPEAKER

        if (this->speaker_ != nullptr) {

          this->speaker_->start();

        }

#endif

      });

      break;

    }

    case api::enums::VOICE_ASSISTANT_TTS_END: {

      std::string url;

      for (const auto &arg : msg.data) {

        if (arg.name == "url") {

          url = arg.value;

        }

      }

      if (url.empty()) {

        ESP_LOGW(TAG, "No url in TTS_END event");

        return;

      }

      ESP_LOGD(TAG, "Response URL: \"%s\"", url.c_str());

      this->defer([this, url]() {

#ifdef USE_MEDIA_PLAYER

        if ((this->media_player_ != nullptr) && (!this->started_streaming_tts_)) {

          this->media_player_response_state_ = MediaPlayerResponseState::URL_SENT;


          this->media_player_->make_call().set_media_url(url).set_announcement(true).perform();


          this->start_playback_timeout_();

        }

        this->started_streaming_tts_ = false;  // Helps indicate reaching the TTS_END stage

#endif

        this->tts_end_trigger_.trigger(url);

      });

      State new_state = this->local_output_ ? State::STREAMING_RESPONSE : State::IDLE;

      if (new_state != this->state_) {

        // Don't needlessly change the state. The intent progress stage may have already changed the state to streaming

        // response.

        this->set_state_(new_state, new_state);

      }

      break;

    }

    case api::enums::VOICE_ASSISTANT_RUN_END: {

      ESP_LOGD(TAG, "Assist Pipeline ended");

      if ((this->state_ == State::START_PIPELINE) || (this->state_ == State::STARTING_PIPELINE) ||

          (this->state_ == State::STREAMING_MICROPHONE)) {

        // Microphone is running, stop it

        this->set_state_(State::STOP_MICROPHONE, State::IDLE);

      } else if (this->state_ == State::AWAITING_RESPONSE) {

        // No TTS start event ("nevermind")

        this->set_state_(State::IDLE, State::IDLE);

      }

      this->defer([this]() { this->end_trigger_.trigger(); });

      break;

    }

    case api::enums::VOICE_ASSISTANT_ERROR: {

      std::string code = "";

      std::string message = "";

      for (const auto &arg : msg.data) {

        if (arg.name == "code") {

          code = arg.value;

        } else if (arg.name == "message") {

          message = arg.value;

        }

      }

      if (code == "wake-word-timeout" || code == "wake_word_detection_aborted" || code == "no_wake_word") {

        // Don't change state here since either the "tts-end" or "run-end" events will do it.

        return;

      } else if (code == "wake-provider-missing" || code == "wake-engine-missing") {

        // Wake word is not set up or not ready on Home Assistant so stop and do not retry until user starts again.

        this->defer([this, code, message]() {

          this->request_stop();

          this->error_trigger_.trigger(code, message);

        });

        return;

      }

      ESP_LOGE(TAG, "Error: %s - %s", code.c_str(), message.c_str());

      if (this->state_ != State::IDLE) {

        this->signal_stop_();

        this->set_state_(State::STOP_MICROPHONE, State::IDLE);

      }

      this->defer([this, code, message]() { this->error_trigger_.trigger(code, message); });

      break;

    }

    case api::enums::VOICE_ASSISTANT_TTS_STREAM_START: {

#ifdef USE_SPEAKER

      if (this->speaker_ != nullptr) {

        this->wait_for_stream_end_ = true;

        ESP_LOGD(TAG, "TTS stream start");

        this->defer([this] { this->tts_stream_start_trigger_.trigger(); });

      }

#endif

      break;

    }

    case api::enums::VOICE_ASSISTANT_TTS_STREAM_END: {

#ifdef USE_SPEAKER

      if (this->speaker_ != nullptr) {

        this->stream_ended_ = true;

        ESP_LOGD(TAG, "TTS stream end");

      }

#endif

      break;

    }

    case api::enums::VOICE_ASSISTANT_STT_VAD_START:

      ESP_LOGD(TAG, "Starting STT by VAD");

      this->defer([this]() { this->stt_vad_start_trigger_.trigger(); });

      break;

    case api::enums::VOICE_ASSISTANT_STT_VAD_END:

      ESP_LOGD(TAG, "STT by VAD end");

      this->set_state_(State::STOP_MICROPHONE, State::AWAITING_RESPONSE);

      this->defer([this]() { this->stt_vad_end_trigger_.trigger(); });

      break;

    default:

      ESP_LOGD(TAG, "Unhandled event type: %" PRId32, msg.event_type);

      break;

  }

}


void VoiceAssistant::on_audio(const api::VoiceAssistantAudio &msg) {

#ifdef USE_SPEAKER  // We should never get to this function if there is no speaker anyway

  if ((this->speaker_ != nullptr) && (this->speaker_buffer_ != nullptr)) {

    if (this->speaker_buffer_index_ + msg.data_len < SPEAKER_BUFFER_SIZE) {

      memcpy(this->speaker_buffer_ + this->speaker_buffer_index_, msg.data, msg.data_len);

      this->speaker_buffer_index_ += msg.data_len;

      this->speaker_buffer_size_ += msg.data_len;

      this->speaker_bytes_received_ += msg.data_len;

      ESP_LOGV(TAG, "Received audio: %u bytes from API", msg.data_len);

    } else {

      ESP_LOGE(TAG, "Cannot receive audio, buffer is full");

    }

  }

#endif

}


void VoiceAssistant::on_timer_event(const api::VoiceAssistantTimerEventResponse &msg) {

  // Find existing timer or add a new one

  auto it = this->timers_.begin();

  for (; it != this->timers_.end(); ++it) {

    if (it->id == msg.timer_id)

      break;

  }

  if (it == this->timers_.end()) {

    this->timers_.push_back({});

    it = this->timers_.end() - 1;

  }

  it->id = msg.timer_id;

  it->name = msg.name;

  it->total_seconds = msg.total_seconds;

  it->seconds_left = msg.seconds_left;

  it->is_active = msg.is_active;


  char timer_buf[Timer::TO_STR_BUFFER_SIZE];

  ESP_LOGD(TAG,

           "Timer Event\n"

           "  Type: %" PRId32 "\n"

           "  %s",

           msg.event_type, it->to_str(timer_buf));


  switch (msg.event_type) {

    case api::enums::VOICE_ASSISTANT_TIMER_STARTED:

      this->timer_started_trigger_.trigger(*it);

      break;

    case api::enums::VOICE_ASSISTANT_TIMER_UPDATED:

      this->timer_updated_trigger_.trigger(*it);

      break;

    case api::enums::VOICE_ASSISTANT_TIMER_CANCELLED:

      this->timer_cancelled_trigger_.trigger(*it);

      this->timers_.erase(it);

      break;

    case api::enums::VOICE_ASSISTANT_TIMER_FINISHED:

      this->timer_finished_trigger_.trigger(*it);

      this->timers_.erase(it);

      break;

  }


  if (this->timers_.empty()) {

    this->cancel_interval("timer-event");

    this->timer_tick_running_ = false;

  } else if (!this->timer_tick_running_) {

    this->set_interval("timer-event", 1000, [this]() { this->timer_tick_(); });

    this->timer_tick_running_ = true;

  }

}


void VoiceAssistant::timer_tick_() {

  for (auto &timer : this->timers_) {

    if (timer.is_active && timer.seconds_left > 0) {

      timer.seconds_left--;

    }

  }

  this->timer_tick_trigger_.trigger(this->timers_);

}


void VoiceAssistant::on_announce(const api::VoiceAssistantAnnounceRequest &msg) {

#ifdef USE_MEDIA_PLAYER

  if (this->media_player_ != nullptr) {

    this->tts_start_trigger_.trigger(msg.text);


    this->media_player_response_state_ = MediaPlayerResponseState::URL_SENT;


    if (!msg.preannounce_media_id.empty()) {

      this->media_player_->make_call().set_media_url(msg.preannounce_media_id).set_announcement(true).perform();

    }

    // Enqueueing a URL with an empty playlist will still play the file immediately

    this->media_player_->make_call()

        .set_command(media_player::MEDIA_PLAYER_COMMAND_ENQUEUE)

        .set_media_url(msg.media_id)

        .set_announcement(true)

        .perform();

    this->continue_conversation_ = msg.start_conversation;


    this->start_playback_timeout_();


    if (this->continuous_) {

      this->set_state_(State::STOP_MICROPHONE, State::STREAMING_RESPONSE);

    } else {

      this->set_state_(State::STREAMING_RESPONSE, State::STREAMING_RESPONSE);

    }


    this->tts_end_trigger_.trigger(msg.media_id);

    this->end_trigger_.trigger();

  }

#endif

}


void VoiceAssistant::on_set_configuration(const std::vector<std::string> &active_wake_words) {

#ifdef USE_MICRO_WAKE_WORD

  if (this->micro_wake_word_) {

    // Disable all wake words first

    for (auto &model : this->micro_wake_word_->get_wake_words()) {

      model->disable();

    }


    // Enable only active wake words

    for (const auto &ww_id : active_wake_words) {

      for (auto &model : this->micro_wake_word_->get_wake_words()) {

        if (model->get_id() == ww_id) {

          model->enable();

          ESP_LOGD(TAG, "Enabled wake word: %s (id=%s)", model->get_wake_word().c_str(), model->get_id().c_str());

        }

      }

    }

  }

#endif

};


const Configuration &VoiceAssistant::get_configuration() {

  this->config_.available_wake_words.clear();

  this->config_.active_wake_words.clear();


#ifdef USE_MICRO_WAKE_WORD

  if (this->micro_wake_word_) {

    this->config_.max_active_wake_words = 1;


    for (auto &model : this->micro_wake_word_->get_wake_words()) {

      if (model->is_enabled()) {

        this->config_.active_wake_words.push_back(model->get_id());

      }


      WakeWord wake_word;

      wake_word.id = model->get_id();

      wake_word.wake_word = model->get_wake_word();

      for (const auto &lang : model->get_trained_languages()) {

        wake_word.trained_languages.push_back(lang);

      }

      this->config_.available_wake_words.push_back(std::move(wake_word));

    }

  } else {

#endif

    // No microWakeWord

    this->config_.max_active_wake_words = 0;

#ifdef USE_MICRO_WAKE_WORD

  }

#endif


  return this->config_;

};


VoiceAssistant *global_voice_assistant = nullptr;  // NOLINT(cppcoreguidelines-avoid-non-const-global-variables)


}  // namespace voice_assistant


}  // namespace esphome


#endif  // USE_VOICE_ASSISTANT

esphome::Component::mark_failed
void mark_failed()
Mark this component as failed.
Definition component.cpp:293

esphome::Component::defer
ESPDEPRECATED("Use const char* overload instead. Removed in 2026.7.0", "2026.1.0") void defer(const std voi defer)(const char *name, std::function< void()> &&f)
Defer a callback to the next loop() call.
Definition component.h:493

esphome::Component::status_set_error
void status_set_error()
Definition component.cpp:413

esphome::Component::set_timeout
ESPDEPRECATED("Use const char* or uint32_t overload instead. Removed in 2026.7.0", "2026.1.0") void set_timeout(const std voi set_timeout)(const char *name, uint32_t timeout, std::function< void()> &&f)
Set a timeout function with a unique name.
Definition component.h:443

esphome::Component::status_clear_error
void status_clear_error()
Definition component.cpp:439

esphome::Component::set_interval
ESPDEPRECATED("Use const char* or uint32_t overload instead. Removed in 2026.7.0", "2026.1.0") void set_interval(const std voi set_interval)(const char *name, uint32_t interval, std::function< void()> &&f)
Set an interval function with a unique name.
Definition component.h:350

esphome::Component::status_has_error
bool status_has_error() const
Definition component.cpp:392

esphome::Component::cancel_timeout
ESPDEPRECATED("Use const char* or uint32_t overload instead. Removed in 2026.7.0", "2026.1.0") bool cancel_timeout(const std boo cancel_timeout)(const char *name)
Cancel a timeout function.
Definition component.h:465

esphome::Component::cancel_interval
ESPDEPRECATED("Use const char* or uint32_t overload instead. Removed in 2026.7.0", "2026.1.0") bool cancel_interval(const std boo cancel_interval)(const char *name)
Cancel an interval function.
Definition component.h:372

esphome::RAMAllocator
An STL allocator that uses SPI or internal RAM.
Definition helpers.h:1794

esphome::RAMAllocator::deallocate
void deallocate(T *p, size_t n)
Definition helpers.h:1849

esphome::RAMAllocator::allocate
T * allocate(size_t n)
Definition helpers.h:1811

esphome::RingBuffer::create
static std::unique_ptr< RingBuffer > create(size_t len)
Definition ring_buffer.cpp:22

esphome::StringRef
StringRef is a reference to a string owned by something else.
Definition string_ref.h:26

esphome::StringRef::empty
constexpr bool empty() const
Definition string_ref.h:76

esphome::Trigger::trigger
void trigger(const Ts &...x)
Inform the parent automation that the event has triggered.
Definition automation.h:325

esphome::api::APIConnection
Definition api_connection.h:35

esphome::api::APIConnection::get_peername_to
const char * get_peername_to(std::span< char, socket::SOCKADDR_STR_LEN > buf) const
Get peer name (IP address) into caller-provided buffer, returns buf for convenience.
Definition api_connection.h:284

esphome::api::APIConnection::get_name
const char * get_name() const
Definition api_connection.h:282

esphome::api::APIServerConnectionBase::send_message
bool send_message(const ProtoMessage &msg, uint8_t message_type)
Definition api_pb2_service.h:22

esphome::api::VoiceAssistantAnnounceFinished
Definition api_pb2.h:2448

esphome::api::VoiceAssistantAnnounceFinished::success
bool success
Definition api_pb2.h:2455

esphome::api::VoiceAssistantAnnounceFinished::MESSAGE_TYPE
static constexpr uint8_t MESSAGE_TYPE
Definition api_pb2.h:2450

esphome::api::VoiceAssistantAnnounceRequest
Definition api_pb2.h:2429

esphome::api::VoiceAssistantAnnounceRequest::start_conversation
bool start_conversation
Definition api_pb2.h:2439

esphome::api::VoiceAssistantAnnounceRequest::text
StringRef text
Definition api_pb2.h:2437

esphome::api::VoiceAssistantAnnounceRequest::preannounce_media_id
StringRef preannounce_media_id
Definition api_pb2.h:2438

esphome::api::VoiceAssistantAnnounceRequest::media_id
StringRef media_id
Definition api_pb2.h:2436

esphome::api::VoiceAssistantAudio
Definition api_pb2.h:2388

esphome::api::VoiceAssistantAudio::data
const uint8_t * data
Definition api_pb2.h:2395

esphome::api::VoiceAssistantAudio::data_len
uint16_t data_len
Definition api_pb2.h:2396

esphome::api::VoiceAssistantAudio::MESSAGE_TYPE
static constexpr uint8_t MESSAGE_TYPE
Definition api_pb2.h:2390

esphome::api::VoiceAssistantAudioSettings
Definition api_pb2.h:2311

esphome::api::VoiceAssistantAudioSettings::auto_gain
uint32_t auto_gain
Definition api_pb2.h:2314

esphome::api::VoiceAssistantAudioSettings::noise_suppression_level
uint32_t noise_suppression_level
Definition api_pb2.h:2313

esphome::api::VoiceAssistantAudioSettings::volume_multiplier
float volume_multiplier
Definition api_pb2.h:2315

esphome::api::VoiceAssistantEventResponse
Definition api_pb2.h:2371

esphome::api::VoiceAssistantEventResponse::event_type
enums::VoiceAssistantEvent event_type
Definition api_pb2.h:2378

esphome::api::VoiceAssistantEventResponse::data
std::vector< VoiceAssistantEventData > data
Definition api_pb2.h:2379

esphome::api::VoiceAssistantRequest
Definition api_pb2.h:2324

esphome::api::VoiceAssistantRequest::conversation_id
StringRef conversation_id
Definition api_pb2.h:2332

esphome::api::VoiceAssistantRequest::flags
uint32_t flags
Definition api_pb2.h:2333

esphome::api::VoiceAssistantRequest::start
bool start
Definition api_pb2.h:2331

esphome::api::VoiceAssistantRequest::wake_word_phrase
StringRef wake_word_phrase
Definition api_pb2.h:2335

esphome::api::VoiceAssistantRequest::MESSAGE_TYPE
static constexpr uint8_t MESSAGE_TYPE
Definition api_pb2.h:2326

esphome::api::VoiceAssistantRequest::audio_settings
VoiceAssistantAudioSettings audio_settings
Definition api_pb2.h:2334

esphome::api::VoiceAssistantTimerEventResponse
Definition api_pb2.h:2408

esphome::api::VoiceAssistantTimerEventResponse::name
StringRef name
Definition api_pb2.h:2417

esphome::api::VoiceAssistantTimerEventResponse::event_type
enums::VoiceAssistantTimerEvent event_type
Definition api_pb2.h:2415

esphome::api::VoiceAssistantTimerEventResponse::total_seconds
uint32_t total_seconds
Definition api_pb2.h:2418

esphome::api::VoiceAssistantTimerEventResponse::timer_id
StringRef timer_id
Definition api_pb2.h:2416

esphome::api::VoiceAssistantTimerEventResponse::seconds_left
uint32_t seconds_left
Definition api_pb2.h:2419

esphome::api::VoiceAssistantTimerEventResponse::is_active
bool is_active
Definition api_pb2.h:2420

esphome::media_player::MediaPlayerCall::set_media_url
MediaPlayerCall & set_media_url(const std::string &url)
Definition media_player.cpp:186

esphome::media_player::MediaPlayerCall::perform
void perform()
Definition media_player.cpp:112

esphome::media_player::MediaPlayerCall::set_announcement
MediaPlayerCall & set_announcement(bool announce)
Definition media_player.cpp:196

esphome::media_player::MediaPlayerCall::set_command
MediaPlayerCall & set_command(MediaPlayerCommand command)
Definition media_player.cpp:131

esphome::media_player::MediaPlayer::state
MediaPlayerState state
Definition media_player.h:151

esphome::media_player::MediaPlayer::add_on_state_callback
void add_on_state_callback(std::function< void()> &&callback)
Definition media_player.cpp:201

esphome::media_player::MediaPlayer::make_call
MediaPlayerCall make_call()
Definition media_player.h:154

esphome::micro_wake_word::MicroWakeWord::get_wake_words
std::vector< WakeWordModel * > get_wake_words()
Definition micro_wake_word.cpp:211

esphome::microphone::MicrophoneSource::is_stopped
bool is_stopped() const
Definition microphone_source.h:64

esphome::microphone::MicrophoneSource::is_running
bool is_running() const
Definition microphone_source.h:63

esphome::microphone::MicrophoneSource::stop
void stop()
Definition microphone_source.cpp:39

esphome::microphone::MicrophoneSource::start
void start()
Definition microphone_source.cpp:32

esphome::microphone::MicrophoneSource::add_data_callback
void add_data_callback(std::function< void(const std::vector< uint8_t > &)> &&data_callback)
Definition microphone_source.cpp:9

esphome::speaker::Speaker::play
virtual size_t play(const uint8_t *data, size_t length)=0
Plays the provided audio data.

esphome::speaker::Speaker::is_running
bool is_running() const
Definition speaker.h:66

esphome::speaker::Speaker::has_buffered_data
virtual bool has_buffered_data() const =0

esphome::speaker::Speaker::start
virtual void start()=0

esphome::speaker::Speaker::stop
virtual void stop()=0

esphome::voice_assistant::VoiceAssistant
Definition voice_assistant.h:112

esphome::voice_assistant::VoiceAssistant::intent_end_trigger_
Trigger intent_end_trigger_
Definition voice_assistant.h:245

esphome::voice_assistant::VoiceAssistant::socket_
std::unique_ptr< socket::Socket > socket_
Definition voice_assistant.h:242

esphome::voice_assistant::VoiceAssistant::VoiceAssistant
VoiceAssistant()
Definition voice_assistant.cpp:30

esphome::voice_assistant::VoiceAssistant::get_configuration
const Configuration & get_configuration()
Definition voice_assistant.cpp:975

esphome::voice_assistant::VoiceAssistant::local_output_
bool local_output_
Definition voice_assistant.h:298

esphome::voice_assistant::VoiceAssistant::speaker_buffer_size_
size_t speaker_buffer_size_
Definition voice_assistant.h:285

esphome::voice_assistant::VoiceAssistant::started_streaming_tts_
bool started_streaming_tts_
Definition voice_assistant.h:293

esphome::voice_assistant::VoiceAssistant::tts_response_url_
std::string tts_response_url_
Definition voice_assistant.h:292

esphome::voice_assistant::VoiceAssistant::start_udp_socket_
bool start_udp_socket_()
Definition voice_assistant.cpp:64

esphome::voice_assistant::VoiceAssistant::wait_for_stream_end_
bool wait_for_stream_end_
Definition voice_assistant.h:287

esphome::voice_assistant::VoiceAssistant::speaker_buffer_index_
size_t speaker_buffer_index_
Definition voice_assistant.h:284

esphome::voice_assistant::VoiceAssistant::udp_socket_running_
bool udp_socket_running_
Definition voice_assistant.h:323

esphome::voice_assistant::VoiceAssistant::intent_start_trigger_
Trigger intent_start_trigger_
Definition voice_assistant.h:246

esphome::voice_assistant::VoiceAssistant::wake_word_
std::string wake_word_
Definition voice_assistant.h:302

esphome::voice_assistant::VoiceAssistant::on_timer_event
void on_timer_event(const api::VoiceAssistantTimerEventResponse &msg)
Definition voice_assistant.cpp:863

esphome::voice_assistant::VoiceAssistant::start_streaming
void start_streaming()
Definition voice_assistant.cpp:500

esphome::voice_assistant::VoiceAssistant::signal_stop_
void signal_stop_()
Definition voice_assistant.cpp:607

esphome::voice_assistant::VoiceAssistant::tts_stream_start_trigger_
Trigger tts_stream_start_trigger_
Definition voice_assistant.h:253

esphome::voice_assistant::VoiceAssistant::client_connected_trigger_
Trigger client_connected_trigger_
Definition voice_assistant.h:264

esphome::voice_assistant::VoiceAssistant::start_trigger_
Trigger start_trigger_
Definition voice_assistant.h:249

esphome::voice_assistant::VoiceAssistant::timer_cancelled_trigger_
Trigger< Timer > timer_cancelled_trigger_
Definition voice_assistant.h:274

esphome::voice_assistant::VoiceAssistant::timers_
std::vector< Timer > timers_
Definition voice_assistant.h:269

esphome::voice_assistant::VoiceAssistant::state_
State state_
Definition voice_assistant.h:319

esphome::voice_assistant::VoiceAssistant::on_audio
void on_audio(const api::VoiceAssistantAudio &msg)
Definition voice_assistant.cpp:847

esphome::voice_assistant::VoiceAssistant::loop
void loop() override
Definition voice_assistant.cpp:184

esphome::voice_assistant::VoiceAssistant::volume_multiplier_
float volume_multiplier_
Definition voice_assistant.h:309

esphome::voice_assistant::VoiceAssistant::media_player_
media_player::MediaPlayer * media_player_
Definition voice_assistant.h:291

esphome::voice_assistant::VoiceAssistant::use_wake_word_
bool use_wake_word_
Definition voice_assistant.h:306

esphome::voice_assistant::VoiceAssistant::get_setup_priority
float get_setup_priority() const override
Definition voice_assistant.cpp:62

esphome::voice_assistant::VoiceAssistant::audio_mode_
AudioMode audio_mode_
Definition voice_assistant.h:322

esphome::voice_assistant::VoiceAssistant::wake_word_detected_trigger_
Trigger wake_word_detected_trigger_
Definition voice_assistant.h:257

esphome::voice_assistant::VoiceAssistant::set_state_
void set_state_(State state)
Definition voice_assistant.cpp:481

esphome::voice_assistant::VoiceAssistant::speaker_buffer_
uint8_t * speaker_buffer_
Definition voice_assistant.h:283

esphome::voice_assistant::VoiceAssistant::stt_vad_end_trigger_
Trigger stt_vad_end_trigger_
Definition voice_assistant.h:251

esphome::voice_assistant::VoiceAssistant::client_subscription
void client_subscription(api::APIConnection *client, bool subscribe)
Definition voice_assistant.cpp:421

esphome::voice_assistant::VoiceAssistant::timer_started_trigger_
Trigger< Timer > timer_started_trigger_
Definition voice_assistant.h:271

esphome::voice_assistant::VoiceAssistant::media_player_response_state_
MediaPlayerResponseState media_player_response_state_
Definition voice_assistant.h:295

esphome::voice_assistant::VoiceAssistant::deallocate_buffers_
void deallocate_buffers_()
Definition voice_assistant.cpp:159

esphome::voice_assistant::VoiceAssistant::silence_detection_
bool silence_detection_
Definition voice_assistant.h:315

esphome::voice_assistant::VoiceAssistant::listening_trigger_
Trigger listening_trigger_
Definition voice_assistant.h:247

esphome::voice_assistant::VoiceAssistant::clear_buffers_
void clear_buffers_()
Definition voice_assistant.cpp:139

esphome::voice_assistant::VoiceAssistant::setup
void setup() override
Definition voice_assistant.cpp:32

esphome::voice_assistant::VoiceAssistant::allocate_buffers_
bool allocate_buffers_()
Definition voice_assistant.cpp:107

esphome::voice_assistant::VoiceAssistant::ring_buffer_
std::shared_ptr< RingBuffer > ring_buffer_
Definition voice_assistant.h:304

esphome::voice_assistant::VoiceAssistant::noise_suppression_level_
uint8_t noise_suppression_level_
Definition voice_assistant.h:307

esphome::voice_assistant::VoiceAssistant::on_event
void on_event(const api::VoiceAssistantEventResponse &msg)
Definition voice_assistant.cpp:629

esphome::voice_assistant::VoiceAssistant::error_trigger_
Trigger< std::string, std::string > error_trigger_
Definition voice_assistant.h:261

esphome::voice_assistant::VoiceAssistant::stt_end_trigger_
Trigger< std::string > stt_end_trigger_
Definition voice_assistant.h:258

esphome::voice_assistant::VoiceAssistant::conversation_timeout_
uint32_t conversation_timeout_
Definition voice_assistant.h:310

esphome::voice_assistant::VoiceAssistant::timer_tick_trigger_
Trigger< const std::vector< Timer > & > timer_tick_trigger_
Definition voice_assistant.h:275

esphome::voice_assistant::VoiceAssistant::write_speaker_
void write_speaker_()
Definition voice_assistant.cpp:403

esphome::voice_assistant::VoiceAssistant::intent_progress_trigger_
Trigger< std::string > intent_progress_trigger_
Definition voice_assistant.h:256

esphome::voice_assistant::VoiceAssistant::on_announce
void on_announce(const api::VoiceAssistantAnnounceRequest &msg)
Definition voice_assistant.cpp:922

esphome::voice_assistant::VoiceAssistant::request_start
void request_start(bool continuous, bool silence_detection)
Definition voice_assistant.cpp:546

esphome::voice_assistant::VoiceAssistant::tts_start_trigger_
Trigger< std::string > tts_start_trigger_
Definition voice_assistant.h:260

esphome::voice_assistant::VoiceAssistant::auto_gain_
uint8_t auto_gain_
Definition voice_assistant.h:308

esphome::voice_assistant::VoiceAssistant::api_client_
api::APIConnection * api_client_
Definition voice_assistant.h:267

esphome::voice_assistant::VoiceAssistant::request_stop
void request_stop()
Definition voice_assistant.cpp:561

esphome::voice_assistant::VoiceAssistant::config_
Configuration config_
Definition voice_assistant.h:326

esphome::voice_assistant::VoiceAssistant::tts_end_trigger_
Trigger< std::string > tts_end_trigger_
Definition voice_assistant.h:259

esphome::voice_assistant::VoiceAssistant::dest_addr_
struct sockaddr_storage dest_addr_
Definition voice_assistant.h:243

esphome::voice_assistant::VoiceAssistant::continuous_
bool continuous_
Definition voice_assistant.h:314

esphome::voice_assistant::VoiceAssistant::timer_tick_
void timer_tick_()
Definition voice_assistant.cpp:913

esphome::voice_assistant::VoiceAssistant::speaker_
speaker::Speaker * speaker_
Definition voice_assistant.h:282

esphome::voice_assistant::VoiceAssistant::timer_updated_trigger_
Trigger< Timer > timer_updated_trigger_
Definition voice_assistant.h:273

esphome::voice_assistant::VoiceAssistant::stream_ended_
bool stream_ended_
Definition voice_assistant.h:288

esphome::voice_assistant::VoiceAssistant::send_buffer_
uint8_t * send_buffer_
Definition voice_assistant.h:312

esphome::voice_assistant::VoiceAssistant::end_trigger_
Trigger end_trigger_
Definition voice_assistant.h:248

esphome::voice_assistant::VoiceAssistant::tts_stream_end_trigger_
Trigger tts_stream_end_trigger_
Definition voice_assistant.h:254

esphome::voice_assistant::VoiceAssistant::stt_vad_start_trigger_
Trigger stt_vad_start_trigger_
Definition voice_assistant.h:250

esphome::voice_assistant::VoiceAssistant::start_playback_timeout_
void start_playback_timeout_()
Definition voice_assistant.cpp:618

esphome::voice_assistant::VoiceAssistant::reset_conversation_id
void reset_conversation_id()
Definition voice_assistant.cpp:179

esphome::voice_assistant::VoiceAssistant::conversation_id_
std::string conversation_id_
Definition voice_assistant.h:300

esphome::voice_assistant::VoiceAssistant::mic_source_
microphone::MicrophoneSource * mic_source_
Definition voice_assistant.h:279

esphome::voice_assistant::VoiceAssistant::micro_wake_word_
micro_wake_word::MicroWakeWord * micro_wake_word_
Definition voice_assistant.h:329

esphome::voice_assistant::VoiceAssistant::speaker_bytes_received_
size_t speaker_bytes_received_
Definition voice_assistant.h:286

esphome::voice_assistant::VoiceAssistant::continue_conversation_
bool continue_conversation_
Definition voice_assistant.h:317

esphome::voice_assistant::VoiceAssistant::failed_to_start
void failed_to_start()
Definition voice_assistant.cpp:494

esphome::voice_assistant::VoiceAssistant::on_set_configuration
void on_set_configuration(const std::vector< std::string > &active_wake_words)
Definition voice_assistant.cpp:954

esphome::voice_assistant::VoiceAssistant::timer_finished_trigger_
Trigger< Timer > timer_finished_trigger_
Definition voice_assistant.h:272

esphome::voice_assistant::VoiceAssistant::idle_trigger_
Trigger idle_trigger_
Definition voice_assistant.h:262

esphome::voice_assistant::VoiceAssistant::timer_tick_running_
bool timer_tick_running_
Definition voice_assistant.h:277

esphome::voice_assistant::VoiceAssistant::client_disconnected_trigger_
Trigger client_disconnected_trigger_
Definition voice_assistant.h:265

esphome::voice_assistant::VoiceAssistant::desired_state_
State desired_state_
Definition voice_assistant.h:320

message
const char * message
Definition component.cpp:38

defines.h

flags
uint16_t flags
Definition dns_server_esp32_idf.cpp:1

state
bool state
Definition fan.h:2

socklen_t
uint32_t socklen_t
Definition headers.h:97

ssize_t
__int64 ssize_t
Definition httplib.h:178

log.h

esphome::api::enums::VOICE_ASSISTANT_REQUEST_USE_WAKE_WORD
@ VOICE_ASSISTANT_REQUEST_USE_WAKE_WORD
Definition api_pb2.h:242

esphome::api::enums::VOICE_ASSISTANT_REQUEST_USE_VAD
@ VOICE_ASSISTANT_REQUEST_USE_VAD
Definition api_pb2.h:241

esphome::api::enums::VOICE_ASSISTANT_TIMER_UPDATED
@ VOICE_ASSISTANT_TIMER_UPDATED
Definition api_pb2.h:265

esphome::api::enums::VOICE_ASSISTANT_TIMER_STARTED
@ VOICE_ASSISTANT_TIMER_STARTED
Definition api_pb2.h:264

esphome::api::enums::VOICE_ASSISTANT_TIMER_FINISHED
@ VOICE_ASSISTANT_TIMER_FINISHED
Definition api_pb2.h:267

esphome::api::enums::VOICE_ASSISTANT_TIMER_CANCELLED
@ VOICE_ASSISTANT_TIMER_CANCELLED
Definition api_pb2.h:266

esphome::api::enums::VOICE_ASSISTANT_INTENT_END
@ VOICE_ASSISTANT_INTENT_END
Definition api_pb2.h:252

esphome::api::enums::VOICE_ASSISTANT_RUN_START
@ VOICE_ASSISTANT_RUN_START
Definition api_pb2.h:247

esphome::api::enums::VOICE_ASSISTANT_TTS_END
@ VOICE_ASSISTANT_TTS_END
Definition api_pb2.h:254

esphome::api::enums::VOICE_ASSISTANT_RUN_END
@ VOICE_ASSISTANT_RUN_END
Definition api_pb2.h:248

esphome::api::enums::VOICE_ASSISTANT_WAKE_WORD_START
@ VOICE_ASSISTANT_WAKE_WORD_START
Definition api_pb2.h:255

esphome::api::enums::VOICE_ASSISTANT_TTS_STREAM_END
@ VOICE_ASSISTANT_TTS_STREAM_END
Definition api_pb2.h:260

esphome::api::enums::VOICE_ASSISTANT_STT_END
@ VOICE_ASSISTANT_STT_END
Definition api_pb2.h:250

esphome::api::enums::VOICE_ASSISTANT_STT_VAD_START
@ VOICE_ASSISTANT_STT_VAD_START
Definition api_pb2.h:257

esphome::api::enums::VOICE_ASSISTANT_INTENT_PROGRESS
@ VOICE_ASSISTANT_INTENT_PROGRESS
Definition api_pb2.h:261

esphome::api::enums::VOICE_ASSISTANT_TTS_START
@ VOICE_ASSISTANT_TTS_START
Definition api_pb2.h:253

esphome::api::enums::VOICE_ASSISTANT_STT_START
@ VOICE_ASSISTANT_STT_START
Definition api_pb2.h:249

esphome::api::enums::VOICE_ASSISTANT_INTENT_START
@ VOICE_ASSISTANT_INTENT_START
Definition api_pb2.h:251

esphome::api::enums::VOICE_ASSISTANT_TTS_STREAM_START
@ VOICE_ASSISTANT_TTS_STREAM_START
Definition api_pb2.h:259

esphome::api::enums::VOICE_ASSISTANT_ERROR
@ VOICE_ASSISTANT_ERROR
Definition api_pb2.h:246

esphome::api::enums::VOICE_ASSISTANT_STT_VAD_END
@ VOICE_ASSISTANT_STT_VAD_END
Definition api_pb2.h:258

esphome::api::enums::VOICE_ASSISTANT_WAKE_WORD_END
@ VOICE_ASSISTANT_WAKE_WORD_END
Definition api_pb2.h:256

esphome::media_player::MEDIA_PLAYER_STATE_ANNOUNCING
@ MEDIA_PLAYER_STATE_ANNOUNCING
Definition media_player.h:40

esphome::media_player::MEDIA_PLAYER_COMMAND_ENQUEUE
@ MEDIA_PLAYER_COMMAND_ENQUEUE
Definition media_player.h:55

esphome::media_player::MEDIA_PLAYER_COMMAND_STOP
@ MEDIA_PLAYER_COMMAND_STOP
Definition media_player.h:49

esphome::setup_priority::AFTER_CONNECTION
constexpr float AFTER_CONNECTION
For components that should be initialized after a data connection (API/MQTT) is connected.
Definition component.h:43

esphome::socket::socket
std::unique_ptr< Socket > socket(int domain, int type, int protocol)
Create a socket of the given domain, type and protocol.
Definition bsd_sockets_impl.cpp:81

esphome::socket::set_sockaddr_any
socklen_t set_sockaddr_any(struct sockaddr *addr, socklen_t addrlen, uint16_t port)
Set a sockaddr to the any address and specified port for the IP version used by socket_ip().
Definition socket.cpp:139

esphome::voice_assistant::MediaPlayerResponseState::FINISHED
@ FINISHED

esphome::voice_assistant::MediaPlayerResponseState::PLAYING
@ PLAYING

esphome::voice_assistant::MediaPlayerResponseState::IDLE
@ IDLE

esphome::voice_assistant::MediaPlayerResponseState::URL_SENT
@ URL_SENT

esphome::voice_assistant::AUDIO_MODE_UDP
@ AUDIO_MODE_UDP
Definition voice_assistant.h:63

esphome::voice_assistant::AUDIO_MODE_API
@ AUDIO_MODE_API
Definition voice_assistant.h:64

esphome::voice_assistant::State
State
Definition voice_assistant.h:46

esphome::voice_assistant::State::STREAMING_RESPONSE
@ STREAMING_RESPONSE

esphome::voice_assistant::State::STOPPING_MICROPHONE
@ STOPPING_MICROPHONE

esphome::voice_assistant::State::STOP_MICROPHONE
@ STOP_MICROPHONE

esphome::voice_assistant::State::WAIT_FOR_VAD
@ WAIT_FOR_VAD

esphome::voice_assistant::State::WAITING_FOR_VAD
@ WAITING_FOR_VAD

esphome::voice_assistant::State::STARTING_PIPELINE
@ STARTING_PIPELINE

esphome::voice_assistant::State::IDLE
@ IDLE

esphome::voice_assistant::State::STREAMING_MICROPHONE
@ STREAMING_MICROPHONE

esphome::voice_assistant::State::AWAITING_RESPONSE
@ AWAITING_RESPONSE

esphome::voice_assistant::State::STARTING_MICROPHONE
@ STARTING_MICROPHONE

esphome::voice_assistant::State::START_PIPELINE
@ START_PIPELINE

esphome::voice_assistant::State::RESPONSE_FINISHED
@ RESPONSE_FINISHED

esphome::voice_assistant::State::START_MICROPHONE
@ START_MICROPHONE

esphome::voice_assistant::global_voice_assistant
VoiceAssistant * global_voice_assistant
Definition voice_assistant.cpp:1007

esphome
Providing packet encoding functions for exchanging data with a remote host.
Definition a01nyub.cpp:7

esphome::written
int written
Definition helpers.h:861

socket.h

esphome::voice_assistant::Configuration
Definition voice_assistant.h:97

esphome::voice_assistant::Configuration::available_wake_words
std::vector< WakeWord > available_wake_words
Definition voice_assistant.h:98

esphome::voice_assistant::Configuration::active_wake_words
std::vector< std::string > active_wake_words
Definition voice_assistant.h:99

esphome::voice_assistant::Configuration::max_active_wake_words
uint32_t max_active_wake_words
Definition voice_assistant.h:100

esphome::voice_assistant::Timer::TO_STR_BUFFER_SIZE
static constexpr size_t TO_STR_BUFFER_SIZE
Buffer size for to_str() - sufficient for typical timer names.
Definition voice_assistant.h:75

esphome::voice_assistant::WakeWord
Definition voice_assistant.h:91

esphome::voice_assistant::WakeWord::wake_word
std::string wake_word
Definition voice_assistant.h:93

esphome::voice_assistant::WakeWord::trained_languages
std::vector< std::string > trained_languages
Definition voice_assistant.h:94

esphome::voice_assistant::WakeWord::id
std::string id
Definition voice_assistant.h:92

sockaddr_in6
Definition headers.h:72

sockaddr_in
Definition headers.h:61

sockaddr_storage
Definition headers.h:90

sockaddr_storage::ss_family
sa_family_t ss_family
Definition headers.h:92

sockaddr
Definition headers.h:83

voice_assistant.h