ESPHome 2026.6.0-dev
Loading...
Searching...
No Matches
i2s_audio_microphone.cpp
Go to the documentation of this file.
2
3#ifdef USE_ESP32
4
5#include <driver/i2s_std.h>
6#include <driver/i2s_pdm.h>
7
8#include "esphome/core/hal.h"
9#include "esphome/core/log.h"
10
12
13namespace esphome::i2s_audio {
14
15static const UBaseType_t MAX_LISTENERS = 16;
16
17static const uint32_t READ_DURATION_MS = 16;
18
19static const size_t TASK_STACK_SIZE = 4096;
20static const ssize_t TASK_PRIORITY = 23;
21
22static const char *const TAG = "i2s_audio.microphone";
23
25 COMMAND_STOP = (1 << 0), // stops the microphone task, set and cleared by ``loop``
26
27 TASK_STARTING = (1 << 10), // set by mic task, cleared by ``loop``
28 TASK_RUNNING = (1 << 11), // set by mic task, cleared by ``loop``
29 TASK_STOPPED = (1 << 13), // set by mic task, cleared by ``loop``
30
31 ALL_BITS = 0x00FFFFFF, // All valid FreeRTOS event group bits
32};
33
35 this->active_listeners_semaphore_ = xSemaphoreCreateCounting(MAX_LISTENERS, MAX_LISTENERS);
36 if (this->active_listeners_semaphore_ == nullptr) {
37 ESP_LOGE(TAG, "Creating semaphore failed");
38 this->mark_failed();
39 return;
40 }
41
42 this->event_group_ = xEventGroupCreate();
43 if (this->event_group_ == nullptr) {
44 ESP_LOGE(TAG, "Creating event group failed");
45 this->mark_failed();
46 return;
47 }
48
50}
51
53 ESP_LOGCONFIG(TAG,
54 "Microphone:\n"
55 " Pin: %d\n"
56 " PDM: %s\n"
57 " DC offset correction: %s",
58 static_cast<int8_t>(this->din_pin_), YESNO(this->pdm_), YESNO(this->correct_dc_offset_));
59}
60
62 uint8_t channel_count = 1;
63 uint8_t bits_per_sample = 16;
64 if (this->slot_bit_width_ != I2S_SLOT_BIT_WIDTH_AUTO) {
65 bits_per_sample = this->slot_bit_width_;
66 }
67
68 if (this->slot_mode_ == I2S_SLOT_MODE_STEREO) {
69 channel_count = 2;
70 }
71
72#ifdef USE_ESP32_VARIANT_ESP32
73 // ESP32 reads audio aligned to a multiple of 2 bytes. For example, if configured for 24 bits per sample, then it will
74 // produce 32 bits per sample, where the actual data is in the most significant bits. Other ESP32 variants produce 24
75 // bits per sample in this situation.
76 if (bits_per_sample < 16) {
77 bits_per_sample = 16;
78 } else if ((bits_per_sample > 16) && (bits_per_sample <= 32)) {
79 bits_per_sample = 32;
80 }
81#endif
82
83 if (this->pdm_) {
84 bits_per_sample = 16; // PDM mics are always 16 bits per sample
85 }
86
87 this->audio_stream_info_ = audio::AudioStreamInfo(bits_per_sample, channel_count, this->sample_rate_);
88}
89
91 if (this->is_failed())
92 return;
93
94 xSemaphoreTake(this->active_listeners_semaphore_, 0);
95}
96
97bool I2SAudioMicrophone::start_driver_() {
98 if (!this->parent_->try_lock()) {
99 return false; // Waiting for another i2s to return lock
100 }
101 this->locked_driver_ = true;
102 esp_err_t err;
103
104 i2s_chan_config_t chan_cfg = {
105 .id = this->parent_->get_port(),
106 .role = this->i2s_role_,
107 .dma_desc_num = 4,
108 .dma_frame_num = 256,
109 .auto_clear = false,
110 };
111 /* Allocate a new RX channel and get the handle of this channel */
112 err = i2s_new_channel(&chan_cfg, NULL, &this->rx_handle_);
113 if (err != ESP_OK) {
114 ESP_LOGE(TAG, "Error creating channel: %s", esp_err_to_name(err));
115 return false;
116 }
117
118 i2s_clock_src_t clk_src = I2S_CLK_SRC_DEFAULT;
119#ifdef I2S_CLK_SRC_APLL
120 if (this->use_apll_) {
121 clk_src = I2S_CLK_SRC_APLL;
122 }
123#endif
124 i2s_std_gpio_config_t pin_config = this->parent_->get_pin_config();
125#if SOC_I2S_SUPPORTS_PDM_RX
126 if (this->pdm_) {
127 i2s_pdm_rx_clk_config_t clk_cfg = {
128 .sample_rate_hz = this->sample_rate_,
129 .clk_src = clk_src,
130 .mclk_multiple = this->mclk_multiple_,
131 .dn_sample_mode = I2S_PDM_DSR_8S,
132 };
133
134 i2s_pdm_rx_slot_config_t slot_cfg = I2S_PDM_RX_SLOT_DEFAULT_CONFIG(I2S_DATA_BIT_WIDTH_16BIT, this->slot_mode_);
135 switch (this->std_slot_mask_) {
136 case I2S_STD_SLOT_LEFT:
137 slot_cfg.slot_mask = I2S_PDM_SLOT_LEFT;
138 break;
139 case I2S_STD_SLOT_RIGHT:
140 slot_cfg.slot_mask = I2S_PDM_SLOT_RIGHT;
141 break;
142 case I2S_STD_SLOT_BOTH:
143 slot_cfg.slot_mask = I2S_PDM_SLOT_BOTH;
144 break;
145 }
146
147 /* Init the channel into PDM RX mode */
148 i2s_pdm_rx_config_t pdm_rx_cfg = {
149 .clk_cfg = clk_cfg,
150 .slot_cfg = slot_cfg,
151 .gpio_cfg =
152 {
153 .clk = pin_config.ws,
154 .din = this->din_pin_,
155 .invert_flags =
156 {
157 .clk_inv = pin_config.invert_flags.ws_inv,
158 },
159 },
160 };
161 err = i2s_channel_init_pdm_rx_mode(this->rx_handle_, &pdm_rx_cfg);
162 } else
163#endif
164 {
165 i2s_std_clk_config_t clk_cfg = {
166 .sample_rate_hz = this->sample_rate_,
167 .clk_src = clk_src,
168 .mclk_multiple = this->mclk_multiple_,
169 };
170 i2s_std_slot_config_t std_slot_cfg =
171 I2S_STD_PHILIPS_SLOT_DEFAULT_CONFIG((i2s_data_bit_width_t) this->slot_bit_width_, this->slot_mode_);
172 std_slot_cfg.slot_bit_width = this->slot_bit_width_;
173 std_slot_cfg.slot_mask = this->std_slot_mask_;
174
175 pin_config.din = this->din_pin_;
176
177 i2s_std_config_t std_cfg = {
178 .clk_cfg = clk_cfg,
179 .slot_cfg = std_slot_cfg,
180 .gpio_cfg = pin_config,
181 };
182 /* Initialize the channel */
183 err = i2s_channel_init_std_mode(this->rx_handle_, &std_cfg);
184 }
185 if (err != ESP_OK) {
186 ESP_LOGE(TAG, "Error initializing channel: %s", esp_err_to_name(err));
187 return false;
188 }
189
190 /* Before reading data, start the RX channel first */
191 err = i2s_channel_enable(this->rx_handle_);
192 if (err != ESP_OK) {
193 ESP_LOGE(TAG, "Enabling failed: %s", esp_err_to_name(err));
194 return false;
195 }
196
197 this->configure_stream_settings_(); // redetermine the settings in case some settings were changed after compilation
198
199 return true;
200}
201
203 if (this->state_ == microphone::STATE_STOPPED || this->is_failed())
204 return;
205
206 xSemaphoreGive(this->active_listeners_semaphore_);
207}
208
209void I2SAudioMicrophone::stop_driver_() {
210 // There is no harm continuing to unload the driver if an error is ever returned by the various functions. This
211 // ensures that we stop/unload the driver when it only partially starts.
212
213 esp_err_t err;
214 if (this->rx_handle_ != nullptr) {
215 /* Have to stop the channel before deleting it */
216 err = i2s_channel_disable(this->rx_handle_);
217 if (err != ESP_OK) {
218 ESP_LOGW(TAG, "Error stopping: %s", esp_err_to_name(err));
219 }
220 /* If the handle is not needed any more, delete it to release the channel resources */
221 err = i2s_del_channel(this->rx_handle_);
222 if (err != ESP_OK) {
223 ESP_LOGW(TAG, "Error deleting channel: %s", esp_err_to_name(err));
224 }
225 this->rx_handle_ = nullptr;
226 }
227 if (this->locked_driver_) {
228 this->parent_->unlock();
229 this->locked_driver_ = false;
230 }
231}
232
234 I2SAudioMicrophone *this_microphone = (I2SAudioMicrophone *) params;
235 xEventGroupSetBits(this_microphone->event_group_, MicrophoneEventGroupBits::TASK_STARTING);
236
237 { // Ensures the samples vector is freed when the task stops
238
239 const size_t bytes_to_read = this_microphone->audio_stream_info_.ms_to_bytes(READ_DURATION_MS);
240 std::vector<uint8_t> samples;
241 samples.reserve(bytes_to_read);
242
243 xEventGroupSetBits(this_microphone->event_group_, MicrophoneEventGroupBits::TASK_RUNNING);
244
245 while (!(xEventGroupGetBits(this_microphone->event_group_) & MicrophoneEventGroupBits::COMMAND_STOP)) {
246 if (this_microphone->data_callbacks_.size() > 0) {
247 samples.resize(bytes_to_read);
248 size_t bytes_read = this_microphone->read_(samples.data(), bytes_to_read, 2 * pdMS_TO_TICKS(READ_DURATION_MS));
249 samples.resize(bytes_read);
250 if (this_microphone->correct_dc_offset_) {
251 this_microphone->fix_dc_offset_(samples);
252 }
253 this_microphone->data_callbacks_.call(samples);
254 } else {
255 vTaskDelay(pdMS_TO_TICKS(READ_DURATION_MS));
256 }
257 }
258 }
259
260 xEventGroupSetBits(this_microphone->event_group_, MicrophoneEventGroupBits::TASK_STOPPED);
261 while (true) {
262 // Continuously delay until the loop method deletes the task
263 vTaskDelay(pdMS_TO_TICKS(10));
264 }
265}
266
267void I2SAudioMicrophone::fix_dc_offset_(std::vector<uint8_t> &data) {
307 const uint8_t dc_filter_shift = 10;
308 const size_t bytes_per_sample = this->audio_stream_info_.samples_to_bytes(1);
309 const uint32_t total_samples = this->audio_stream_info_.bytes_to_samples(data.size());
310 for (uint32_t sample_index = 0; sample_index < total_samples; ++sample_index) {
311 const uint32_t byte_index = sample_index * bytes_per_sample;
312 int32_t input = audio::unpack_audio_sample_to_q31(&data[byte_index], bytes_per_sample);
313 int32_t output = input - this->dc_offset_prev_input_ +
314 (this->dc_offset_prev_output_ - (this->dc_offset_prev_output_ >> dc_filter_shift));
315 this->dc_offset_prev_input_ = input;
316 this->dc_offset_prev_output_ = output;
317 audio::pack_q31_as_audio_sample(output, &data[byte_index], bytes_per_sample);
318 }
319}
320
321size_t I2SAudioMicrophone::read_(uint8_t *buf, size_t len, TickType_t ticks_to_wait) {
322 size_t bytes_read = 0;
323 // i2s_channel_read expects the timeout value in ms, not ticks
324 esp_err_t err = i2s_channel_read(this->rx_handle_, buf, len, &bytes_read, pdTICKS_TO_MS(ticks_to_wait));
325 if ((err != ESP_OK) && ((err != ESP_ERR_TIMEOUT) || (ticks_to_wait != 0))) {
326 // Ignore ESP_ERR_TIMEOUT if ticks_to_wait = 0, as it will read the data on the next call
327 if (!this->status_has_warning()) {
328 // Avoid spamming the logs with the error message if its repeated
329 ESP_LOGW(TAG, "Read error: %s", esp_err_to_name(err));
330 }
331 this->status_set_warning();
332 return 0;
333 }
334 if ((bytes_read == 0) && (ticks_to_wait > 0)) {
335 this->status_set_warning();
336 return 0;
337 }
338 this->status_clear_warning();
339#ifdef USE_ESP32_VARIANT_ESP32
340 // For ESP32 16-bit standard mono mode, adjacent samples need to be swapped.
341 if (this->slot_mode_ == I2S_SLOT_MODE_MONO && this->slot_bit_width_ == I2S_SLOT_BIT_WIDTH_16BIT && !this->pdm_) {
342 int16_t *samples = reinterpret_cast<int16_t *>(buf);
343 size_t sample_count = bytes_read / sizeof(int16_t);
344 for (size_t i = 0; i + 1 < sample_count; i += 2) {
345 int16_t tmp = samples[i];
346 samples[i] = samples[i + 1];
347 samples[i + 1] = tmp;
348 }
349 }
350#endif
351 return bytes_read;
352}
353
355 uint32_t event_group_bits = xEventGroupGetBits(this->event_group_);
356
357 if (event_group_bits & MicrophoneEventGroupBits::TASK_STARTING) {
358 ESP_LOGV(TAG, "Task started, attempting to allocate buffer");
359 xEventGroupClearBits(this->event_group_, MicrophoneEventGroupBits::TASK_STARTING);
360 }
361
362 if (event_group_bits & MicrophoneEventGroupBits::TASK_RUNNING) {
363 ESP_LOGV(TAG, "Task is running and reading data");
364
365 xEventGroupClearBits(this->event_group_, MicrophoneEventGroupBits::TASK_RUNNING);
367 }
368
369 if ((event_group_bits & MicrophoneEventGroupBits::TASK_STOPPED)) {
370 ESP_LOGV(TAG, "Task finished, freeing resources and uninstalling driver");
371
372 vTaskDelete(this->task_handle_);
373 this->task_handle_ = nullptr;
374 this->stop_driver_();
375 xEventGroupClearBits(this->event_group_, ALL_BITS);
376 this->status_clear_error();
377
379 }
380
381 // Start the microphone if any semaphores are taken
382 if ((uxSemaphoreGetCount(this->active_listeners_semaphore_) < MAX_LISTENERS) &&
385 }
386
387 // Stop the microphone if all semaphores are returned
388 if ((uxSemaphoreGetCount(this->active_listeners_semaphore_) == MAX_LISTENERS) &&
391 }
392
393 switch (this->state_) {
395 if (this->status_has_error()) {
396 break;
397 }
398
399 if (!this->start_driver_()) {
400 ESP_LOGE(TAG, "Driver failed to start; retrying in 1 second");
401 this->status_momentary_error("driver_fail", 1000);
402 this->stop_driver_(); // Stop/frees whatever possibly started
403 break;
404 }
405
406 if (this->task_handle_ == nullptr) {
407 xTaskCreate(I2SAudioMicrophone::mic_task, "mic_task", TASK_STACK_SIZE, (void *) this, TASK_PRIORITY,
408 &this->task_handle_);
409
410 if (this->task_handle_ == nullptr) {
411 ESP_LOGE(TAG, "Task failed to start, retrying in 1 second");
412 this->status_momentary_error("task_fail", 1000);
413 this->stop_driver_(); // Stops the driver to return the lock; will be reloaded in next attempt
414 }
415 }
416
417 break;
419 break;
421 xEventGroupSetBits(this->event_group_, MicrophoneEventGroupBits::COMMAND_STOP);
422 break;
424 break;
425 }
426}
427
428} // namespace esphome::i2s_audio
429
430#endif // USE_ESP32
void mark_failed()
Mark this component as failed.
void status_momentary_error(const char *name, uint32_t length=5000)
Set error status flag and automatically clear it after a timeout.
bool is_failed() const
Definition component.h:272
void status_clear_error()
Definition component.h:295
bool status_has_warning() const
Definition component.h:278
bool status_has_error() const
Definition component.h:280
void status_clear_warning()
Definition component.h:289
size_t ms_to_bytes(uint32_t ms) const
Converts duration to bytes.
Definition audio.h:73
size_t samples_to_bytes(uint32_t samples) const
Converts samples to bytes.
Definition audio.h:58
uint32_t bytes_to_samples(size_t bytes) const
Convert bytes to samples.
Definition audio.h:48
i2s_std_slot_mask_t std_slot_mask_
Definition i2s_audio.h:28
i2s_slot_bit_width_t slot_bit_width_
Definition i2s_audio.h:29
i2s_mclk_multiple_t mclk_multiple_
Definition i2s_audio.h:32
void configure_stream_settings_()
Starts the I2S driver.
audio::AudioStreamInfo audio_stream_info_
Definition microphone.h:45
CallbackManager< void(const std::vector< uint8_t > &)> data_callbacks_
Definition microphone.h:47
__int64 ssize_t
Definition httplib.h:178
int32_t unpack_audio_sample_to_q31(const uint8_t *data, size_t bytes_per_sample)
Unpacks a quantized audio sample into a Q31 fixed-point number.
Definition audio.h:156
void pack_q31_as_audio_sample(int32_t sample, uint8_t *data, size_t bytes_per_sample)
Packs a Q31 fixed-point number as an audio sample with the specified number of bytes per sample.
Definition audio.h:182
const void size_t len
Definition hal.h:64
static void uint32_t