ESPHome 2026.6.0-dev
Loading...
Searching...
No Matches
micro_wake_word.h
Go to the documentation of this file.
1#pragma once
2
3#ifdef USE_ESP32
4
6#include "streaming_model.h"
7
10
15
16#ifdef USE_OTA_STATE_LISTENER
18#endif
19
20#include <freertos/event_groups.h>
21
22#include <frontend.h>
23#include <frontend_util.h>
24
26
33
35#ifdef USE_OTA_STATE_LISTENER
36 ,
38#endif
39{
40 public:
41 void setup() override;
42 void loop() override;
43 float get_setup_priority() const override;
44 void dump_config() override;
45
46#ifdef USE_OTA_STATE_LISTENER
47 void on_ota_global_state(ota::OTAState state, float progress, uint8_t error, ota::OTAComponent *comp) override;
48#endif
49
50 void start();
51 void stop();
52
53 bool is_running() const { return this->state_ != State::STOPPED; }
54
55 void set_features_step_size(uint8_t step_size) { this->features_step_size_ = step_size; }
56
58 this->microphone_source_ = microphone_source;
59 }
60
61 void set_stop_after_detection(bool stop_after_detection) { this->stop_after_detection_ = stop_after_detection; }
62
63 void set_task_stack_in_psram(bool task_stack_in_psram) { this->task_stack_in_psram_ = task_stack_in_psram; }
64
66
68
69#ifdef USE_MICRO_WAKE_WORD_VAD
70 void add_vad_model(const uint8_t *model_start, uint8_t probability_cutoff, size_t sliding_window_size,
71 size_t tensor_arena_size);
72
73 // Intended for the voice assistant component to fetch VAD status
74 bool get_vad_state() { return this->vad_state_; }
75#endif
76
77 // Intended for the voice assistant component to access which wake words are available
78 // Since these are pointers to the WakeWordModel objects, the voice assistant component can enable or disable them
79 std::vector<WakeWordModel *> get_wake_words();
80
81 protected:
85
86 std::weak_ptr<ring_buffer::RingBuffer> ring_buffer_;
87 std::vector<WakeWordModel *> wake_word_models_;
88
89#ifdef USE_MICRO_WAKE_WORD_VAD
90 std::unique_ptr<VADModel> vad_model_;
91 bool vad_state_{false};
92#endif
93
94 bool pending_start_{false};
95 bool pending_stop_{false};
96
98
100
102
103 // Audio frontend handles generating spectrogram features
104 struct FrontendConfig frontend_config_;
105 struct FrontendState frontend_state_;
106
107 // Handles managing the stop/state of the inference task
108 EventGroupHandle_t event_group_;
109
110 // Used to send messages about the models' states to the main loop
111 QueueHandle_t detection_queue_;
112
114
115 static void inference_task(void *params);
116
118 void suspend_task_();
120 void resume_task_();
121
122 void set_state_(State state);
123
132 bool generate_features_(const int16_t *audio_buffer, size_t samples_available,
133 int8_t features_buffer[PREPROCESSOR_FEATURE_SIZE], size_t *processed_samples);
134
138
140 void unload_models_();
141
145 bool update_model_probabilities_(const int8_t audio_features[PREPROCESSOR_FEATURE_SIZE]);
146};
147
148} // namespace esphome::micro_wake_word
149
150#endif // USE_ESP32
Helper for FreeRTOS static task management.
Definition static_task.h:15
void resume_task_()
Resumes the inference task.
microphone::MicrophoneSource * microphone_source_
void set_task_stack_in_psram(bool task_stack_in_psram)
void process_probabilities_()
Processes any new probabilities for each model.
void set_stop_after_detection(bool stop_after_detection)
std::vector< WakeWordModel * > wake_word_models_
void suspend_task_()
Suspends the inference task.
Trigger< std::string > wake_word_detected_trigger_
void add_wake_word_model(WakeWordModel *model)
bool generate_features_(const int16_t *audio_buffer, size_t samples_available, int8_t features_buffer[PREPROCESSOR_FEATURE_SIZE], size_t *processed_samples)
Generates a spectrogram feature from an input buffer of audio samples.
bool update_model_probabilities_(const int8_t audio_features[PREPROCESSOR_FEATURE_SIZE])
Runs an inference with each model using the new spectrogram features.
std::unique_ptr< VADModel > vad_model_
std::weak_ptr< ring_buffer::RingBuffer > ring_buffer_
void add_vad_model(const uint8_t *model_start, uint8_t probability_cutoff, size_t sliding_window_size, size_t tensor_arena_size)
void set_features_step_size(uint8_t step_size)
void set_microphone_source(microphone::MicrophoneSource *microphone_source)
void unload_models_()
Deletes each model's TFLite interpreters and frees tensor arena memory.
std::vector< WakeWordModel * > get_wake_words()
void on_ota_global_state(ota::OTAState state, float progress, uint8_t error, ota::OTAComponent *comp) override
Trigger< std::string > * get_wake_word_detected_trigger()
Listener interface for global OTA state changes (includes OTA component pointer).
bool state
Definition fan.h:2