ESPHome 2026.6.0-dev
Loading...
Searching...
No Matches
audio_decoder.cpp
Go to the documentation of this file.
1#include "audio_decoder.h"
2
3#ifdef USE_ESP32
4
5#include "esphome/core/hal.h"
6#include "esphome/core/log.h"
7
8namespace esphome::audio {
9
10static const char *const TAG = "audio.decoder";
11
12static const uint32_t READ_WRITE_TIMEOUT_MS = 20; // Timeout for transferring audio data
13
14// Max consecutive decode iterations that consume input but produce no output; e.g., skipping a large metadata block,
15// before yielding and returning.
16static const uint8_t MAX_NO_OUTPUT_ITERATIONS = 32;
17
18static const uint32_t MAX_POTENTIALLY_FAILED_COUNT = 10;
19
20AudioDecoder::AudioDecoder(size_t input_buffer_size, size_t output_buffer_size)
21 : input_buffer_size_(input_buffer_size) {
23}
24
25esp_err_t AudioDecoder::add_source(std::weak_ptr<ring_buffer::RingBuffer> &input_ring_buffer) {
26 // Zero-copy source reading directly from the ring buffer's internal storage. Raw file data is byte
27 // aligned, so no frame alignment is required.
28 auto source = RingBufferAudioSource::create(input_ring_buffer.lock(), this->input_buffer_size_);
29 if (source == nullptr) {
30 // create() only returns nullptr for invalid arguments (expired ring buffer or zero buffer size)
31 return ESP_ERR_INVALID_ARG;
32 }
33 this->input_buffer_ = std::move(source);
34 return ESP_OK;
35}
36
37esp_err_t AudioDecoder::add_source(const uint8_t *data_pointer, size_t length) {
38 auto source = make_unique<ConstAudioSourceBuffer>();
39 source->set_data(data_pointer, length);
40 this->input_buffer_ = std::move(source);
41 return ESP_OK;
42}
43
44esp_err_t AudioDecoder::add_sink(std::weak_ptr<ring_buffer::RingBuffer> &output_ring_buffer) {
45 if (this->output_transfer_buffer_ != nullptr) {
46 this->output_transfer_buffer_->set_sink(output_ring_buffer);
47 return ESP_OK;
48 }
49 return ESP_ERR_NO_MEM;
50}
51
52#ifdef USE_SPEAKER
54 if (this->output_transfer_buffer_ != nullptr) {
55 this->output_transfer_buffer_->set_sink(speaker);
56 return ESP_OK;
57 }
58 return ESP_ERR_NO_MEM;
59}
60#endif
61
63 if (this->output_transfer_buffer_ != nullptr) {
64 this->output_transfer_buffer_->set_sink(callback);
65 return ESP_OK;
66 }
67 return ESP_ERR_NO_MEM;
68}
69
70esp_err_t AudioDecoder::start(AudioFileType audio_file_type) {
71 if (this->output_transfer_buffer_ == nullptr) {
72 return ESP_ERR_NO_MEM;
73 }
74
75 this->audio_file_type_ = audio_file_type;
76
78 this->end_of_file_ = false;
79
80 switch (this->audio_file_type_) {
81#ifdef USE_AUDIO_FLAC_SUPPORT
83 this->flac_decoder_ = make_unique<micro_flac::FLACDecoder>();
85 this->output_transfer_buffer_->capacity(); // Adjusted and reallocated after reading the header
86 break;
87#endif
88#ifdef USE_AUDIO_MP3_SUPPORT
90 this->mp3_decoder_ = make_unique<micro_mp3::Mp3Decoder>();
92 this->output_transfer_buffer_->capacity(); // Adjusted and reallocated after reading the header
93 break;
94#endif
95#ifdef USE_AUDIO_OPUS_SUPPORT
97 this->opus_decoder_ = make_unique<micro_opus::OggOpusDecoder>();
99 this->output_transfer_buffer_->capacity(); // Adjusted and reallocated after reading the header
100 break;
101#endif
102#ifdef USE_AUDIO_WAV_SUPPORT
104 this->wav_decoder_ = make_unique<micro_wav::WAVDecoder>();
105 // 1 KiB suffices to always make progress while avoiding excessive CPU spinning for decoding
106 this->free_buffer_required_ = 1024;
107 if (this->output_transfer_buffer_->capacity() < this->free_buffer_required_) {
108 this->output_transfer_buffer_->reallocate(this->free_buffer_required_);
109 }
110 break;
111#endif
113 default:
114 return ESP_ERR_NOT_SUPPORTED;
115 break;
116 }
117
118 return ESP_OK;
119}
120
122 if (this->input_buffer_ == nullptr) {
124 }
125
126 if (stop_gracefully) {
127 if (this->output_transfer_buffer_->available() == 0) {
128 if (this->end_of_file_) {
129 // The file decoder indicates it reached the end of file
131 }
132
133 if (!this->input_buffer_->has_buffered_data()) {
134 // If all the internal buffers are empty, the decoding is done
136 }
137 }
138 }
139
140 if (this->potentially_failed_count_ > MAX_POTENTIALLY_FAILED_COUNT) {
141 if (stop_gracefully) {
142 // No more new data is going to come in, so decoding is done
144 }
146 }
147
149 uint8_t no_output_iterations = 0;
150
152 // Transfer decoded out
153 if (!this->pause_output_) {
154 // Never shift the data in the output transfer buffer to avoid unnecessary, slow data moves
155 size_t bytes_written =
156 this->output_transfer_buffer_->transfer_data_to_sink(pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS), false);
157
158 if (this->audio_stream_info_.has_value()) {
159 this->accumulated_frames_written_ += this->audio_stream_info_.value().bytes_to_frames(bytes_written);
160 this->playback_ms_ +=
161 this->audio_stream_info_.value().frames_to_milliseconds_with_remainder(&this->accumulated_frames_written_);
162 }
163
164 if ((bytes_written > 0) && (this->output_transfer_buffer_->available() == 0)) {
165 // All decoded audio has been flushed to the sink; return so the caller can react to stop/pause before
166 // decoding the next batch
168 }
169 } else {
170 // If paused, block to avoid wasting CPU resources
171 delay(READ_WRITE_TIMEOUT_MS);
172 }
173
174 if (this->output_transfer_buffer_->available() > 0) {
175 // Output transfer buffer indicates backpressure, return so caller can handle other events;
176 // e.g., stop/pause, before trying again
178 }
179
180 // Reaching here means no decoded output is pending (any would have returned above). Bounds long no-output
181 // stretches; e.g., skipping a large metadata block, so a source that keeps the ring buffer full can't spin this
182 // loop without yielding and trip the watchdog. The delay yields allowing other tasks to feed the watchdog and
183 // the return keeps stop/pause responsive.
184 if (++no_output_iterations >= MAX_NO_OUTPUT_ITERATIONS) {
185 delay(1);
187 }
188
189 // Expose the next chunk of file data. Every decoder buffers internally and consumes only what it
190 // processed, so the source does not need to accumulate or stitch chunks across fill() calls.
191 this->input_buffer_->fill(pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS), false);
192
193 const size_t available_before_decode = this->input_buffer_->available();
194
195 if (available_before_decode == 0) {
196 // No data to decode, attempt to get more data next time
198 } else {
199 switch (this->audio_file_type_) {
200#ifdef USE_AUDIO_FLAC_SUPPORT
202 state = this->decode_flac_();
203 break;
204#endif
205#ifdef USE_AUDIO_MP3_SUPPORT
207 state = this->decode_mp3_();
208 break;
209#endif
210#ifdef USE_AUDIO_OPUS_SUPPORT
212 state = this->decode_opus_();
213 break;
214#endif
215#ifdef USE_AUDIO_WAV_SUPPORT
217 state = this->decode_wav_();
218 break;
219#endif
221 default:
223 break;
224 }
225 }
226
229 } else if (state == FileDecoderState::END_OF_FILE) {
230 this->end_of_file_ = true;
231 } else if (state == FileDecoderState::FAILED) {
234 // Reset the failsafe only when the iteration made forward progress: input was consumed or output was
235 // produced (output_transfer_buffer_ is drained empty above, so any available bytes are new). A
236 // MORE_TO_PROCESS that neither consumes input nor produces output means the decoder is stalled; count it
237 // toward the failsafe so a stuck stream eventually surfaces as FAILED instead of looping forever.
238 if ((this->input_buffer_->available() < available_before_decode) ||
239 (this->output_transfer_buffer_->available() > 0)) {
241 } else {
243 }
244 }
245 }
247}
248
249#ifdef USE_AUDIO_FLAC_SUPPORT
251 size_t bytes_consumed, samples_decoded;
252
253 micro_flac::FLACDecoderResult result = this->flac_decoder_->decode(
254 this->input_buffer_->data(), this->input_buffer_->available(), this->output_transfer_buffer_->get_buffer_end(),
255 this->output_transfer_buffer_->free(), bytes_consumed, samples_decoded);
256
257 if (result == micro_flac::FLAC_DECODER_SUCCESS) {
258 if (samples_decoded > 0 && this->audio_stream_info_.has_value()) {
259 this->output_transfer_buffer_->increase_buffer_length(
260 this->audio_stream_info_.value().samples_to_bytes(samples_decoded));
261 }
262 this->input_buffer_->consume(bytes_consumed);
263 } else if (result == micro_flac::FLAC_DECODER_HEADER_READY) {
264 // Header just parsed, stream info now available
265 const auto &info = this->flac_decoder_->get_stream_info();
266 this->audio_stream_info_ = audio::AudioStreamInfo(info.bits_per_sample(), info.num_channels(), info.sample_rate());
267
268 // Reallocate the output transfer buffer to the required size
269 this->free_buffer_required_ = this->flac_decoder_->get_output_buffer_size_samples() * info.bytes_per_sample();
270 if (!this->output_transfer_buffer_->reallocate(this->free_buffer_required_)) {
272 }
273 this->input_buffer_->consume(bytes_consumed);
274 } else if (result == micro_flac::FLAC_DECODER_END_OF_STREAM) {
275 this->input_buffer_->consume(bytes_consumed);
277 } else if (result == micro_flac::FLAC_DECODER_NEED_MORE_DATA) {
278 this->input_buffer_->consume(bytes_consumed);
280 } else if (result == micro_flac::FLAC_DECODER_ERROR_OUTPUT_TOO_SMALL) {
281 // Reallocate to decode the frame on the next call
282 const auto &info = this->flac_decoder_->get_stream_info();
283 this->free_buffer_required_ = this->flac_decoder_->get_output_buffer_size_samples() * info.bytes_per_sample();
284 if (!this->output_transfer_buffer_->reallocate(this->free_buffer_required_)) {
286 }
287 } else {
288 ESP_LOGE(TAG, "FLAC decoder failed: %d", static_cast<int>(result));
290 }
291
293}
294#endif
295
296#ifdef USE_AUDIO_MP3_SUPPORT
298 // microMP3's samples_decoded value is samples per channel; e.g., what ESPHome typically calls an audio frame.
299 // microMP3 uses the term frame to refer to an MP3 frame: an encoded packet that contains multiple audio frames.
300 size_t bytes_consumed = 0;
301 size_t samples_decoded = 0;
302
303 // microMP3 buffers internally: it consumes from our input buffer at its own pace, emits MP3_STREAM_INFO_READY once
304 // the first frame header is parsed, and only then produces PCM. It handles sync-word search and ID3v2 tag skipping.
305 micro_mp3::Mp3Result result = this->mp3_decoder_->decode(
306 this->input_buffer_->data(), this->input_buffer_->available(), this->output_transfer_buffer_->get_buffer_end(),
307 this->output_transfer_buffer_->free(), bytes_consumed, samples_decoded);
308
309 this->input_buffer_->consume(bytes_consumed);
310
311 if (result == micro_mp3::MP3_OK) {
312 if (samples_decoded > 0 && this->audio_stream_info_.has_value()) {
313 this->output_transfer_buffer_->increase_buffer_length(
314 this->audio_stream_info_.value().frames_to_bytes(samples_decoded));
315 }
316 } else if (result == micro_mp3::MP3_STREAM_INFO_READY) {
317 // First successful header parse: capture stream info and resize the output buffer to fit one full frame.
318 // microMP3 always outputs 16-bit PCM.
319 this->audio_stream_info_ =
320 audio::AudioStreamInfo(16, this->mp3_decoder_->get_channels(), this->mp3_decoder_->get_sample_rate());
322 this->mp3_decoder_->get_samples_per_frame() * this->mp3_decoder_->get_channels() * sizeof(int16_t);
323 if (!this->output_transfer_buffer_->reallocate(this->free_buffer_required_)) {
325 }
326 } else if (result == micro_mp3::MP3_NEED_MORE_DATA) {
328 } else if (result == micro_mp3::MP3_OUTPUT_BUFFER_TOO_SMALL) {
329 // Reallocate to decode the frame on the next call
330 if (this->mp3_decoder_->get_channels() > 0) {
332 this->mp3_decoder_->get_samples_per_frame() * this->mp3_decoder_->get_channels() * sizeof(int16_t);
333 } else {
334 // Fallback to worst-case size if channel info isn't available
335 this->free_buffer_required_ = this->mp3_decoder_->get_min_output_buffer_bytes();
336 }
337 if (!this->output_transfer_buffer_->reallocate(this->free_buffer_required_)) {
339 }
340 } else if (result == micro_mp3::MP3_DECODE_ERROR) {
341 // Corrupt frame skipped; recoverable, retry on next call
342 ESP_LOGW(TAG, "MP3 decoder skipped a corrupt frame");
344 } else {
345 // MP3_ALLOCATION_FAILED, MP3_INPUT_INVALID, or any future error -- not recoverable
346 ESP_LOGE(TAG, "MP3 decoder failed: %d", static_cast<int>(result));
348 }
349
351}
352#endif
353
354#ifdef USE_AUDIO_OPUS_SUPPORT
356 bool processed_header = this->opus_decoder_->is_initialized();
357
358 size_t bytes_consumed, samples_decoded;
359
360 micro_opus::OggOpusResult result = this->opus_decoder_->decode(
361 this->input_buffer_->data(), this->input_buffer_->available(), this->output_transfer_buffer_->get_buffer_end(),
362 this->output_transfer_buffer_->free(), bytes_consumed, samples_decoded);
363
364 if (result == micro_opus::OGG_OPUS_OK) {
365 if (!processed_header && this->opus_decoder_->is_initialized()) {
366 // Header processed and stream info is available
367 this->audio_stream_info_ =
368 audio::AudioStreamInfo(this->opus_decoder_->get_bit_depth(), this->opus_decoder_->get_channels(),
369 this->opus_decoder_->get_sample_rate());
370 }
371 if (samples_decoded > 0 && this->audio_stream_info_.has_value()) {
372 // Some audio was processed
373 this->output_transfer_buffer_->increase_buffer_length(
374 this->audio_stream_info_.value().frames_to_bytes(samples_decoded));
375 }
376 this->input_buffer_->consume(bytes_consumed);
377 } else if (result == micro_opus::OGG_OPUS_OUTPUT_BUFFER_TOO_SMALL) {
378 // Reallocate to decode the packet on the next call
379 this->free_buffer_required_ = this->opus_decoder_->get_required_output_buffer_size();
380 if (!this->output_transfer_buffer_->reallocate(this->free_buffer_required_)) {
381 // Couldn't reallocate output buffer
383 }
384 } else {
385 ESP_LOGE(TAG, "Opus decoder failed: %" PRId8, result);
387 }
389}
390#endif
391
392#ifdef USE_AUDIO_WAV_SUPPORT
394 // microWAV's samples_decoded counts individual channel samples; e.g., for
395 // 16-bit stereo, 4 input bytes results in 2 samples_decoded.
396 size_t bytes_consumed = 0;
397 size_t samples_decoded = 0;
398
399 micro_wav::WAVDecoderResult result = this->wav_decoder_->decode(
400 this->input_buffer_->data(), this->input_buffer_->available(), this->output_transfer_buffer_->get_buffer_end(),
401 this->output_transfer_buffer_->free(), bytes_consumed, samples_decoded);
402
403 this->input_buffer_->consume(bytes_consumed);
404
405 if (result == micro_wav::WAV_DECODER_SUCCESS) {
406 if (samples_decoded > 0 && this->audio_stream_info_.has_value()) {
407 this->output_transfer_buffer_->increase_buffer_length(
408 this->audio_stream_info_.value().samples_to_bytes(samples_decoded));
409 }
410 } else if (result == micro_wav::WAV_DECODER_HEADER_READY) {
411 // After HEADER_READY, get_bits_per_sample() returns the output bit depth
412 // (16 for A-law/mu-law, 32 for IEEE float, original value for PCM).
413 this->audio_stream_info_ =
414 audio::AudioStreamInfo(this->wav_decoder_->get_bits_per_sample(), this->wav_decoder_->get_channels(),
415 this->wav_decoder_->get_sample_rate());
416 } else if (result == micro_wav::WAV_DECODER_NEED_MORE_DATA) {
418 } else if (result == micro_wav::WAV_DECODER_END_OF_STREAM) {
420 } else {
421 ESP_LOGE(TAG, "WAV decoder failed: %d", static_cast<int>(result));
423 }
424
426}
427#endif
428
429} // namespace esphome::audio
430
431#endif
optional< AudioStreamInfo > audio_stream_info_
esp_err_t add_source(std::weak_ptr< ring_buffer::RingBuffer > &input_ring_buffer)
Adds a source ring buffer for raw file data.
esp_err_t start(AudioFileType audio_file_type)
Sets up decoding the file.
FileDecoderState decode_opus_()
std::unique_ptr< AudioReadableBuffer > input_buffer_
std::unique_ptr< micro_flac::FLACDecoder > flac_decoder_
std::unique_ptr< micro_wav::WAVDecoder > wav_decoder_
std::unique_ptr< AudioSinkTransferBuffer > output_transfer_buffer_
esp_err_t add_sink(std::weak_ptr< ring_buffer::RingBuffer > &output_ring_buffer)
Adds a sink ring buffer for decoded audio.
FileDecoderState decode_flac_()
std::unique_ptr< micro_opus::OggOpusDecoder > opus_decoder_
AudioDecoderState decode(bool stop_gracefully)
Decodes audio from the ring buffer source and writes to the sink.
std::unique_ptr< micro_mp3::Mp3Decoder > mp3_decoder_
AudioDecoder(size_t input_buffer_size, size_t output_buffer_size)
Allocates the output transfer buffer and stores the input buffer size for later use by add_source()
Abstract interface for writing decoded audio data to a sink.
static std::unique_ptr< AudioSinkTransferBuffer > create(size_t buffer_size)
Creates a new sink transfer buffer.
static std::unique_ptr< RingBufferAudioSource > create(std::shared_ptr< ring_buffer::RingBuffer > ring_buffer, size_t max_fill_bytes, uint8_t alignment_bytes=1)
Creates a new ring-buffer-backed audio source after validating its parameters.
bool state
Definition fan.h:2
void HOT delay(uint32_t ms)
Definition hal.cpp:85
static void uint32_t
uint16_t length
Definition tt21100.cpp:0