ESPHome 2026.5.0-dev
Loading...
Searching...
No Matches
i2s_audio_microphone.cpp
Go to the documentation of this file.
2
3#ifdef USE_ESP32
4
5#include <driver/i2s_std.h>
6#include <driver/i2s_pdm.h>
7
8#include "esphome/core/hal.h"
9#include "esphome/core/log.h"
10
12
13namespace esphome {
14namespace i2s_audio {
15
16static const UBaseType_t MAX_LISTENERS = 16;
17
18static const uint32_t READ_DURATION_MS = 16;
19
20static const size_t TASK_STACK_SIZE = 4096;
21static const ssize_t TASK_PRIORITY = 23;
22
23static const char *const TAG = "i2s_audio.microphone";
24
26 COMMAND_STOP = (1 << 0), // stops the microphone task, set and cleared by ``loop``
27
28 TASK_STARTING = (1 << 10), // set by mic task, cleared by ``loop``
29 TASK_RUNNING = (1 << 11), // set by mic task, cleared by ``loop``
30 TASK_STOPPED = (1 << 13), // set by mic task, cleared by ``loop``
31
32 ALL_BITS = 0x00FFFFFF, // All valid FreeRTOS event group bits
33};
34
36 this->active_listeners_semaphore_ = xSemaphoreCreateCounting(MAX_LISTENERS, MAX_LISTENERS);
37 if (this->active_listeners_semaphore_ == nullptr) {
38 ESP_LOGE(TAG, "Creating semaphore failed");
39 this->mark_failed();
40 return;
41 }
42
43 this->event_group_ = xEventGroupCreate();
44 if (this->event_group_ == nullptr) {
45 ESP_LOGE(TAG, "Creating event group failed");
46 this->mark_failed();
47 return;
48 }
49
51}
52
54 ESP_LOGCONFIG(TAG,
55 "Microphone:\n"
56 " Pin: %d\n"
57 " PDM: %s\n"
58 " DC offset correction: %s",
59 static_cast<int8_t>(this->din_pin_), YESNO(this->pdm_), YESNO(this->correct_dc_offset_));
60}
61
63 uint8_t channel_count = 1;
64 uint8_t bits_per_sample = 16;
65 if (this->slot_bit_width_ != I2S_SLOT_BIT_WIDTH_AUTO) {
66 bits_per_sample = this->slot_bit_width_;
67 }
68
69 if (this->slot_mode_ == I2S_SLOT_MODE_STEREO) {
70 channel_count = 2;
71 }
72
73#ifdef USE_ESP32_VARIANT_ESP32
74 // ESP32 reads audio aligned to a multiple of 2 bytes. For example, if configured for 24 bits per sample, then it will
75 // produce 32 bits per sample, where the actual data is in the most significant bits. Other ESP32 variants produce 24
76 // bits per sample in this situation.
77 if (bits_per_sample < 16) {
78 bits_per_sample = 16;
79 } else if ((bits_per_sample > 16) && (bits_per_sample <= 32)) {
80 bits_per_sample = 32;
81 }
82#endif
83
84 if (this->pdm_) {
85 bits_per_sample = 16; // PDM mics are always 16 bits per sample
86 }
87
88 this->audio_stream_info_ = audio::AudioStreamInfo(bits_per_sample, channel_count, this->sample_rate_);
89}
90
92 if (this->is_failed())
93 return;
94
95 xSemaphoreTake(this->active_listeners_semaphore_, 0);
96}
97
98bool I2SAudioMicrophone::start_driver_() {
99 if (!this->parent_->try_lock()) {
100 return false; // Waiting for another i2s to return lock
101 }
102 this->locked_driver_ = true;
103 esp_err_t err;
104
105 i2s_chan_config_t chan_cfg = {
106 .id = this->parent_->get_port(),
107 .role = this->i2s_role_,
108 .dma_desc_num = 4,
109 .dma_frame_num = 256,
110 .auto_clear = false,
111 };
112 /* Allocate a new RX channel and get the handle of this channel */
113 err = i2s_new_channel(&chan_cfg, NULL, &this->rx_handle_);
114 if (err != ESP_OK) {
115 ESP_LOGE(TAG, "Error creating channel: %s", esp_err_to_name(err));
116 return false;
117 }
118
119 i2s_clock_src_t clk_src = I2S_CLK_SRC_DEFAULT;
120#ifdef I2S_CLK_SRC_APLL
121 if (this->use_apll_) {
122 clk_src = I2S_CLK_SRC_APLL;
123 }
124#endif
125 i2s_std_gpio_config_t pin_config = this->parent_->get_pin_config();
126#if SOC_I2S_SUPPORTS_PDM_RX
127 if (this->pdm_) {
128 i2s_pdm_rx_clk_config_t clk_cfg = {
129 .sample_rate_hz = this->sample_rate_,
130 .clk_src = clk_src,
131 .mclk_multiple = this->mclk_multiple_,
132 .dn_sample_mode = I2S_PDM_DSR_8S,
133 };
134
135 i2s_pdm_rx_slot_config_t slot_cfg = I2S_PDM_RX_SLOT_DEFAULT_CONFIG(I2S_DATA_BIT_WIDTH_16BIT, this->slot_mode_);
136 switch (this->std_slot_mask_) {
137 case I2S_STD_SLOT_LEFT:
138 slot_cfg.slot_mask = I2S_PDM_SLOT_LEFT;
139 break;
140 case I2S_STD_SLOT_RIGHT:
141 slot_cfg.slot_mask = I2S_PDM_SLOT_RIGHT;
142 break;
143 case I2S_STD_SLOT_BOTH:
144 slot_cfg.slot_mask = I2S_PDM_SLOT_BOTH;
145 break;
146 }
147
148 /* Init the channel into PDM RX mode */
149 i2s_pdm_rx_config_t pdm_rx_cfg = {
150 .clk_cfg = clk_cfg,
151 .slot_cfg = slot_cfg,
152 .gpio_cfg =
153 {
154 .clk = pin_config.ws,
155 .din = this->din_pin_,
156 .invert_flags =
157 {
158 .clk_inv = pin_config.invert_flags.ws_inv,
159 },
160 },
161 };
162 err = i2s_channel_init_pdm_rx_mode(this->rx_handle_, &pdm_rx_cfg);
163 } else
164#endif
165 {
166 i2s_std_clk_config_t clk_cfg = {
167 .sample_rate_hz = this->sample_rate_,
168 .clk_src = clk_src,
169 .mclk_multiple = this->mclk_multiple_,
170 };
171 i2s_std_slot_config_t std_slot_cfg =
172 I2S_STD_PHILIPS_SLOT_DEFAULT_CONFIG((i2s_data_bit_width_t) this->slot_bit_width_, this->slot_mode_);
173 std_slot_cfg.slot_bit_width = this->slot_bit_width_;
174 std_slot_cfg.slot_mask = this->std_slot_mask_;
175
176 pin_config.din = this->din_pin_;
177
178 i2s_std_config_t std_cfg = {
179 .clk_cfg = clk_cfg,
180 .slot_cfg = std_slot_cfg,
181 .gpio_cfg = pin_config,
182 };
183 /* Initialize the channel */
184 err = i2s_channel_init_std_mode(this->rx_handle_, &std_cfg);
185 }
186 if (err != ESP_OK) {
187 ESP_LOGE(TAG, "Error initializing channel: %s", esp_err_to_name(err));
188 return false;
189 }
190
191 /* Before reading data, start the RX channel first */
192 err = i2s_channel_enable(this->rx_handle_);
193 if (err != ESP_OK) {
194 ESP_LOGE(TAG, "Enabling failed: %s", esp_err_to_name(err));
195 return false;
196 }
197
198 this->configure_stream_settings_(); // redetermine the settings in case some settings were changed after compilation
199
200 return true;
201}
202
204 if (this->state_ == microphone::STATE_STOPPED || this->is_failed())
205 return;
206
207 xSemaphoreGive(this->active_listeners_semaphore_);
208}
209
210void I2SAudioMicrophone::stop_driver_() {
211 // There is no harm continuing to unload the driver if an error is ever returned by the various functions. This
212 // ensures that we stop/unload the driver when it only partially starts.
213
214 esp_err_t err;
215 if (this->rx_handle_ != nullptr) {
216 /* Have to stop the channel before deleting it */
217 err = i2s_channel_disable(this->rx_handle_);
218 if (err != ESP_OK) {
219 ESP_LOGW(TAG, "Error stopping: %s", esp_err_to_name(err));
220 }
221 /* If the handle is not needed any more, delete it to release the channel resources */
222 err = i2s_del_channel(this->rx_handle_);
223 if (err != ESP_OK) {
224 ESP_LOGW(TAG, "Error deleting channel: %s", esp_err_to_name(err));
225 }
226 this->rx_handle_ = nullptr;
227 }
228 if (this->locked_driver_) {
229 this->parent_->unlock();
230 this->locked_driver_ = false;
231 }
232}
233
235 I2SAudioMicrophone *this_microphone = (I2SAudioMicrophone *) params;
236 xEventGroupSetBits(this_microphone->event_group_, MicrophoneEventGroupBits::TASK_STARTING);
237
238 { // Ensures the samples vector is freed when the task stops
239
240 const size_t bytes_to_read = this_microphone->audio_stream_info_.ms_to_bytes(READ_DURATION_MS);
241 std::vector<uint8_t> samples;
242 samples.reserve(bytes_to_read);
243
244 xEventGroupSetBits(this_microphone->event_group_, MicrophoneEventGroupBits::TASK_RUNNING);
245
246 while (!(xEventGroupGetBits(this_microphone->event_group_) & MicrophoneEventGroupBits::COMMAND_STOP)) {
247 if (this_microphone->data_callbacks_.size() > 0) {
248 samples.resize(bytes_to_read);
249 size_t bytes_read = this_microphone->read_(samples.data(), bytes_to_read, 2 * pdMS_TO_TICKS(READ_DURATION_MS));
250 samples.resize(bytes_read);
251 if (this_microphone->correct_dc_offset_) {
252 this_microphone->fix_dc_offset_(samples);
253 }
254 this_microphone->data_callbacks_.call(samples);
255 } else {
256 vTaskDelay(pdMS_TO_TICKS(READ_DURATION_MS));
257 }
258 }
259 }
260
261 xEventGroupSetBits(this_microphone->event_group_, MicrophoneEventGroupBits::TASK_STOPPED);
262 while (true) {
263 // Continuously delay until the loop method deletes the task
264 vTaskDelay(pdMS_TO_TICKS(10));
265 }
266}
267
268void I2SAudioMicrophone::fix_dc_offset_(std::vector<uint8_t> &data) {
308 const uint8_t dc_filter_shift = 10;
309 const size_t bytes_per_sample = this->audio_stream_info_.samples_to_bytes(1);
310 const uint32_t total_samples = this->audio_stream_info_.bytes_to_samples(data.size());
311 for (uint32_t sample_index = 0; sample_index < total_samples; ++sample_index) {
312 const uint32_t byte_index = sample_index * bytes_per_sample;
313 int32_t input = audio::unpack_audio_sample_to_q31(&data[byte_index], bytes_per_sample);
314 int32_t output = input - this->dc_offset_prev_input_ +
315 (this->dc_offset_prev_output_ - (this->dc_offset_prev_output_ >> dc_filter_shift));
316 this->dc_offset_prev_input_ = input;
317 this->dc_offset_prev_output_ = output;
318 audio::pack_q31_as_audio_sample(output, &data[byte_index], bytes_per_sample);
319 }
320}
321
322size_t I2SAudioMicrophone::read_(uint8_t *buf, size_t len, TickType_t ticks_to_wait) {
323 size_t bytes_read = 0;
324 // i2s_channel_read expects the timeout value in ms, not ticks
325 esp_err_t err = i2s_channel_read(this->rx_handle_, buf, len, &bytes_read, pdTICKS_TO_MS(ticks_to_wait));
326 if ((err != ESP_OK) && ((err != ESP_ERR_TIMEOUT) || (ticks_to_wait != 0))) {
327 // Ignore ESP_ERR_TIMEOUT if ticks_to_wait = 0, as it will read the data on the next call
328 if (!this->status_has_warning()) {
329 // Avoid spamming the logs with the error message if its repeated
330 ESP_LOGW(TAG, "Read error: %s", esp_err_to_name(err));
331 }
332 this->status_set_warning();
333 return 0;
334 }
335 if ((bytes_read == 0) && (ticks_to_wait > 0)) {
336 this->status_set_warning();
337 return 0;
338 }
339 this->status_clear_warning();
340#ifdef USE_ESP32_VARIANT_ESP32
341 // For ESP32 16-bit standard mono mode, adjacent samples need to be swapped.
342 if (this->slot_mode_ == I2S_SLOT_MODE_MONO && this->slot_bit_width_ == I2S_SLOT_BIT_WIDTH_16BIT && !this->pdm_) {
343 int16_t *samples = reinterpret_cast<int16_t *>(buf);
344 size_t sample_count = bytes_read / sizeof(int16_t);
345 for (size_t i = 0; i + 1 < sample_count; i += 2) {
346 int16_t tmp = samples[i];
347 samples[i] = samples[i + 1];
348 samples[i + 1] = tmp;
349 }
350 }
351#endif
352 return bytes_read;
353}
354
356 uint32_t event_group_bits = xEventGroupGetBits(this->event_group_);
357
358 if (event_group_bits & MicrophoneEventGroupBits::TASK_STARTING) {
359 ESP_LOGV(TAG, "Task started, attempting to allocate buffer");
360 xEventGroupClearBits(this->event_group_, MicrophoneEventGroupBits::TASK_STARTING);
361 }
362
363 if (event_group_bits & MicrophoneEventGroupBits::TASK_RUNNING) {
364 ESP_LOGV(TAG, "Task is running and reading data");
365
366 xEventGroupClearBits(this->event_group_, MicrophoneEventGroupBits::TASK_RUNNING);
368 }
369
370 if ((event_group_bits & MicrophoneEventGroupBits::TASK_STOPPED)) {
371 ESP_LOGV(TAG, "Task finished, freeing resources and uninstalling driver");
372
373 vTaskDelete(this->task_handle_);
374 this->task_handle_ = nullptr;
375 this->stop_driver_();
376 xEventGroupClearBits(this->event_group_, ALL_BITS);
377 this->status_clear_error();
378
380 }
381
382 // Start the microphone if any semaphores are taken
383 if ((uxSemaphoreGetCount(this->active_listeners_semaphore_) < MAX_LISTENERS) &&
386 }
387
388 // Stop the microphone if all semaphores are returned
389 if ((uxSemaphoreGetCount(this->active_listeners_semaphore_) == MAX_LISTENERS) &&
392 }
393
394 switch (this->state_) {
396 if (this->status_has_error()) {
397 break;
398 }
399
400 if (!this->start_driver_()) {
401 ESP_LOGE(TAG, "Driver failed to start; retrying in 1 second");
402 this->status_momentary_error("driver_fail", 1000);
403 this->stop_driver_(); // Stop/frees whatever possibly started
404 break;
405 }
406
407 if (this->task_handle_ == nullptr) {
408 xTaskCreate(I2SAudioMicrophone::mic_task, "mic_task", TASK_STACK_SIZE, (void *) this, TASK_PRIORITY,
409 &this->task_handle_);
410
411 if (this->task_handle_ == nullptr) {
412 ESP_LOGE(TAG, "Task failed to start, retrying in 1 second");
413 this->status_momentary_error("task_fail", 1000);
414 this->stop_driver_(); // Stops the driver to return the lock; will be reloaded in next attempt
415 }
416 }
417
418 break;
420 break;
422 xEventGroupSetBits(this->event_group_, MicrophoneEventGroupBits::COMMAND_STOP);
423 break;
425 break;
426 }
427}
428
429} // namespace i2s_audio
430} // namespace esphome
431
432#endif // USE_ESP32
void mark_failed()
Mark this component as failed.
void status_momentary_error(const char *name, uint32_t length=5000)
Set error status flag and automatically clear it after a timeout.
bool is_failed() const
Definition component.h:284
void status_clear_error()
Definition component.h:312
bool status_has_warning() const
Definition component.h:290
bool status_has_error() const
Definition component.h:292
void status_clear_warning()
Definition component.h:306
size_t ms_to_bytes(uint32_t ms) const
Converts duration to bytes.
Definition audio.h:73
size_t samples_to_bytes(uint32_t samples) const
Converts samples to bytes.
Definition audio.h:58
uint32_t bytes_to_samples(size_t bytes) const
Convert bytes to samples.
Definition audio.h:48
i2s_std_slot_mask_t std_slot_mask_
Definition i2s_audio.h:29
i2s_slot_bit_width_t slot_bit_width_
Definition i2s_audio.h:30
i2s_mclk_multiple_t mclk_multiple_
Definition i2s_audio.h:33
void configure_stream_settings_()
Starts the I2S driver.
audio::AudioStreamInfo audio_stream_info_
Definition microphone.h:46
CallbackManager< void(const std::vector< uint8_t > &)> data_callbacks_
Definition microphone.h:48
__int64 ssize_t
Definition httplib.h:178
int32_t unpack_audio_sample_to_q31(const uint8_t *data, size_t bytes_per_sample)
Unpacks a quantized audio sample into a Q31 fixed-point number.
Definition audio.h:152
void pack_q31_as_audio_sample(int32_t sample, uint8_t *data, size_t bytes_per_sample)
Packs a Q31 fixed-point number as an audio sample with the specified number of bytes per sample.
Definition audio.h:178
Providing packet encoding functions for exchanging data with a remote host.
Definition a01nyub.cpp:7
std::string size_t len
Definition helpers.h:1045
static void uint32_t