ESPHome 2026.3.0-dev
Loading...
Searching...
No Matches
audio_decoder.cpp
Go to the documentation of this file.
1#include "audio_decoder.h"
2
3#ifdef USE_ESP32
4
5#include "esphome/core/hal.h"
6#include "esphome/core/log.h"
7
8namespace esphome {
9namespace audio {
10
11static const char *const TAG = "audio.decoder";
12
13static const uint32_t DECODING_TIMEOUT_MS = 50; // The decode function will yield after this duration
14static const uint32_t READ_WRITE_TIMEOUT_MS = 20; // Timeout for transferring audio data
15
16static const uint32_t MAX_POTENTIALLY_FAILED_COUNT = 10;
17
18AudioDecoder::AudioDecoder(size_t input_buffer_size, size_t output_buffer_size)
19 : input_buffer_size_(input_buffer_size) {
21}
22
24#ifdef USE_AUDIO_MP3_SUPPORT
26 esp_audio_libs::helix_decoder::MP3FreeDecoder(this->mp3_decoder_);
27 }
28#endif
29}
30
31esp_err_t AudioDecoder::add_source(std::weak_ptr<RingBuffer> &input_ring_buffer) {
33 if (source == nullptr) {
34 return ESP_ERR_NO_MEM;
35 }
36 source->set_source(input_ring_buffer);
37 this->input_buffer_ = std::move(source);
38 return ESP_OK;
39}
40
41esp_err_t AudioDecoder::add_source(const uint8_t *data_pointer, size_t length) {
42 auto source = make_unique<ConstAudioSourceBuffer>();
43 source->set_data(data_pointer, length);
44 this->input_buffer_ = std::move(source);
45 return ESP_OK;
46}
47
48esp_err_t AudioDecoder::add_sink(std::weak_ptr<RingBuffer> &output_ring_buffer) {
49 if (this->output_transfer_buffer_ != nullptr) {
50 this->output_transfer_buffer_->set_sink(output_ring_buffer);
51 return ESP_OK;
52 }
53 return ESP_ERR_NO_MEM;
54}
55
56#ifdef USE_SPEAKER
58 if (this->output_transfer_buffer_ != nullptr) {
59 this->output_transfer_buffer_->set_sink(speaker);
60 return ESP_OK;
61 }
62 return ESP_ERR_NO_MEM;
63}
64#endif
65
67 if (this->output_transfer_buffer_ != nullptr) {
68 this->output_transfer_buffer_->set_sink(callback);
69 return ESP_OK;
70 }
71 return ESP_ERR_NO_MEM;
72}
73
74esp_err_t AudioDecoder::start(AudioFileType audio_file_type) {
75 if (this->output_transfer_buffer_ == nullptr) {
76 return ESP_ERR_NO_MEM;
77 }
78
79 this->audio_file_type_ = audio_file_type;
80
82 this->end_of_file_ = false;
83
84 switch (this->audio_file_type_) {
85#ifdef USE_AUDIO_FLAC_SUPPORT
87 this->flac_decoder_ = make_unique<esp_audio_libs::flac::FLACDecoder>();
88 // CRC check slows down decoding by 15-20% on an ESP32-S3. FLAC sources in ESPHome are either from an http source
89 // or built into the firmware, so the data integrity is already verified by the time it gets to the decoder,
90 // making the CRC check unnecessary.
91 this->flac_decoder_->set_crc_check_enabled(false);
93 this->output_transfer_buffer_->capacity(); // Adjusted and reallocated after reading the header
94 break;
95#endif
96#ifdef USE_AUDIO_MP3_SUPPORT
98 this->mp3_decoder_ = esp_audio_libs::helix_decoder::MP3InitDecoder();
99
100 // MP3 always has 1152 samples per chunk
101 this->free_buffer_required_ = 1152 * sizeof(int16_t) * 2; // samples * size per sample * channels
102
103 // Always reallocate the output transfer buffer to the smallest necessary size
104 this->output_transfer_buffer_->reallocate(this->free_buffer_required_);
105 break;
106#endif
107#ifdef USE_AUDIO_OPUS_SUPPORT
109 this->opus_decoder_ = make_unique<micro_opus::OggOpusDecoder>();
111 this->output_transfer_buffer_->capacity(); // Adjusted and reallocated after reading the header
112 this->decoder_buffers_internally_ = true;
113 break;
114#endif
116 this->wav_decoder_ = make_unique<esp_audio_libs::wav_decoder::WAVDecoder>();
117 this->wav_decoder_->reset();
118
119 // Processing WAVs doesn't actually require a specific amount of buffer size, as it is already in PCM format.
120 // Thus, we don't reallocate to a minimum size.
121 this->free_buffer_required_ = 1024;
122 if (this->output_transfer_buffer_->capacity() < this->free_buffer_required_) {
123 this->output_transfer_buffer_->reallocate(this->free_buffer_required_);
124 }
125 break;
127 default:
128 return ESP_ERR_NOT_SUPPORTED;
129 break;
130 }
131
132 return ESP_OK;
133}
134
136 if (this->input_buffer_ == nullptr) {
138 }
139
140 if (stop_gracefully) {
141 if (this->output_transfer_buffer_->available() == 0) {
142 if (this->end_of_file_) {
143 // The file decoder indicates it reached the end of file
145 }
146
147 if (!this->input_buffer_->has_buffered_data()) {
148 // If all the internal buffers are empty, the decoding is done
150 }
151 }
152 }
153
154 if (this->potentially_failed_count_ > MAX_POTENTIALLY_FAILED_COUNT) {
155 if (stop_gracefully) {
156 // No more new data is going to come in, so decoding is done
158 }
160 }
161
163
164 uint32_t decoding_start = millis();
165
166 bool first_loop_iteration = true;
167
168 size_t bytes_processed = 0;
169 size_t bytes_available_before_processing = 0;
170
172 // Transfer decoded out
173 if (!this->pause_output_) {
174 // Never shift the data in the output transfer buffer to avoid unnecessary, slow data moves
175 size_t bytes_written =
176 this->output_transfer_buffer_->transfer_data_to_sink(pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS), false);
177
178 if (this->audio_stream_info_.has_value()) {
179 this->accumulated_frames_written_ += this->audio_stream_info_.value().bytes_to_frames(bytes_written);
180 this->playback_ms_ +=
181 this->audio_stream_info_.value().frames_to_milliseconds_with_remainder(&this->accumulated_frames_written_);
182 }
183 } else {
184 // If paused, block to avoid wasting CPU resources
185 delay(READ_WRITE_TIMEOUT_MS);
186 }
187
188 // Verify there is enough space to store more decoded audio and that the function hasn't been running too long
189 if ((this->output_transfer_buffer_->free() < this->free_buffer_required_) ||
190 (millis() - decoding_start > DECODING_TIMEOUT_MS)) {
192 }
193
194 // Decode more audio
195
196 // Only shift data on the first loop iteration to avoid unnecessary, slow moves
197 // If the decoder buffers internally, then never shift
198 size_t bytes_read = this->input_buffer_->fill(pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS),
199 first_loop_iteration && !this->decoder_buffers_internally_);
200
201 if (!first_loop_iteration && (this->input_buffer_->available() < bytes_processed)) {
202 // Less data is available than what was processed in last iteration, so don't attempt to decode.
203 // This attempts to avoid the decoder from consistently trying to decode an incomplete frame. The transfer buffer
204 // will shift the remaining data to the start and copy more from the source the next time the decode function is
205 // called
206 break;
207 }
208
209 bytes_available_before_processing = this->input_buffer_->available();
210
211 if ((this->potentially_failed_count_ > 0) && (bytes_read == 0)) {
212 // Failed to decode in last attempt and there is no new data
213
214 if ((this->input_buffer_->free() == 0) && first_loop_iteration) {
215 // The input buffer is full (or read-only, e.g. const flash source). Since it previously failed on the exact
216 // same data, we can never recover. For const sources this is correct: the entire file is already available, so
217 // a decode failure is genuine, not a transient out-of-data condition.
219 } else {
220 // Attempt to get more data next time
222 }
223 } else if (this->input_buffer_->available() == 0) {
224 // No data to decode, attempt to get more data next time
226 } else {
227 switch (this->audio_file_type_) {
228#ifdef USE_AUDIO_FLAC_SUPPORT
230 state = this->decode_flac_();
231 break;
232#endif
233#ifdef USE_AUDIO_MP3_SUPPORT
235 state = this->decode_mp3_();
236 break;
237#endif
238#ifdef USE_AUDIO_OPUS_SUPPORT
240 state = this->decode_opus_();
241 break;
242#endif
244 state = this->decode_wav_();
245 break;
247 default:
249 break;
250 }
251 }
252
253 first_loop_iteration = false;
254 bytes_processed = bytes_available_before_processing - this->input_buffer_->available();
255
258 } else if (state == FileDecoderState::END_OF_FILE) {
259 this->end_of_file_ = true;
260 } else if (state == FileDecoderState::FAILED) {
264 }
265 }
267}
268
269#ifdef USE_AUDIO_FLAC_SUPPORT
271 if (!this->audio_stream_info_.has_value()) {
272 // Header hasn't been read
273 auto result = this->flac_decoder_->read_header(this->input_buffer_->data(), this->input_buffer_->available());
274
275 if (result > esp_audio_libs::flac::FLAC_DECODER_HEADER_OUT_OF_DATA) {
276 // Serrious error reading FLAC header, there is no recovery
278 }
279
280 size_t bytes_consumed = this->flac_decoder_->get_bytes_index();
281 this->input_buffer_->consume(bytes_consumed);
282
283 if (result == esp_audio_libs::flac::FLAC_DECODER_HEADER_OUT_OF_DATA) {
285 }
286
287 // Reallocate the output transfer buffer to the smallest necessary size
288 this->free_buffer_required_ = flac_decoder_->get_output_buffer_size_bytes();
289 if (!this->output_transfer_buffer_->reallocate(this->free_buffer_required_)) {
290 // Couldn't reallocate output buffer
292 }
293
294 this->audio_stream_info_ =
295 audio::AudioStreamInfo(this->flac_decoder_->get_sample_depth(), this->flac_decoder_->get_num_channels(),
296 this->flac_decoder_->get_sample_rate());
297
299 }
300
301 uint32_t output_samples = 0;
302 auto result = this->flac_decoder_->decode_frame(this->input_buffer_->data(), this->input_buffer_->available(),
303 this->output_transfer_buffer_->get_buffer_end(), &output_samples);
304
305 if (result == esp_audio_libs::flac::FLAC_DECODER_ERROR_OUT_OF_DATA) {
306 // Not an issue, just needs more data that we'll get next time.
308 }
309
310 size_t bytes_consumed = this->flac_decoder_->get_bytes_index();
311 this->input_buffer_->consume(bytes_consumed);
312
313 if (result > esp_audio_libs::flac::FLAC_DECODER_ERROR_OUT_OF_DATA) {
314 // Corrupted frame, don't retry with current buffer content, wait for new sync
316 }
317
318 // We have successfully decoded some input data and have new output data
319 this->output_transfer_buffer_->increase_buffer_length(
320 this->audio_stream_info_.value().samples_to_bytes(output_samples));
321
322 if (result == esp_audio_libs::flac::FLAC_DECODER_NO_MORE_FRAMES) {
324 }
325
327}
328#endif
329
330#ifdef USE_AUDIO_MP3_SUPPORT
332 // Look for the next sync word
333 int buffer_length = (int) this->input_buffer_->available();
334 int32_t offset = esp_audio_libs::helix_decoder::MP3FindSyncWord(this->input_buffer_->data(), buffer_length);
335
336 if (offset < 0) {
337 // New data may have the sync word
338 this->input_buffer_->consume(buffer_length);
340 }
341
342 // Advance read pointer to match the offset for the syncword
343 this->input_buffer_->consume(offset);
344 const uint8_t *buffer_start = this->input_buffer_->data();
345
346 buffer_length = (int) this->input_buffer_->available();
347 int err = esp_audio_libs::helix_decoder::MP3Decode(this->mp3_decoder_, &buffer_start, &buffer_length,
348 (int16_t *) this->output_transfer_buffer_->get_buffer_end(), 0);
349
350 size_t consumed = this->input_buffer_->available() - buffer_length;
351 this->input_buffer_->consume(consumed);
352
353 if (err) {
354 switch (err) {
355 case esp_audio_libs::helix_decoder::ERR_MP3_OUT_OF_MEMORY:
356 [[fallthrough]];
357 case esp_audio_libs::helix_decoder::ERR_MP3_NULL_POINTER:
359 break;
360 default:
361 // Most errors are recoverable by moving on to the next frame, so mark as potentailly failed
363 break;
364 }
365 } else {
366 esp_audio_libs::helix_decoder::MP3FrameInfo mp3_frame_info;
367 esp_audio_libs::helix_decoder::MP3GetLastFrameInfo(this->mp3_decoder_, &mp3_frame_info);
368 if (mp3_frame_info.outputSamps > 0) {
369 int bytes_per_sample = (mp3_frame_info.bitsPerSample / 8);
370 this->output_transfer_buffer_->increase_buffer_length(mp3_frame_info.outputSamps * bytes_per_sample);
371
372 if (!this->audio_stream_info_.has_value()) {
373 this->audio_stream_info_ =
374 audio::AudioStreamInfo(mp3_frame_info.bitsPerSample, mp3_frame_info.nChans, mp3_frame_info.samprate);
375 }
376 }
377 }
378
380}
381#endif
382
383#ifdef USE_AUDIO_OPUS_SUPPORT
385 bool processed_header = this->opus_decoder_->is_initialized();
386
387 size_t bytes_consumed, samples_decoded;
388
389 micro_opus::OggOpusResult result = this->opus_decoder_->decode(
390 this->input_buffer_->data(), this->input_buffer_->available(), this->output_transfer_buffer_->get_buffer_end(),
391 this->output_transfer_buffer_->free(), bytes_consumed, samples_decoded);
392
393 if (result == micro_opus::OGG_OPUS_OK) {
394 if (!processed_header && this->opus_decoder_->is_initialized()) {
395 // Header processed and stream info is available
396 this->audio_stream_info_ =
397 audio::AudioStreamInfo(this->opus_decoder_->get_bit_depth(), this->opus_decoder_->get_channels(),
398 this->opus_decoder_->get_sample_rate());
399 }
400 if (samples_decoded > 0 && this->audio_stream_info_.has_value()) {
401 // Some audio was processed
402 this->output_transfer_buffer_->increase_buffer_length(
403 this->audio_stream_info_.value().frames_to_bytes(samples_decoded));
404 }
405 this->input_buffer_->consume(bytes_consumed);
406 } else if (result == micro_opus::OGG_OPUS_OUTPUT_BUFFER_TOO_SMALL) {
407 // Reallocate to decode the packet on the next call
408 this->free_buffer_required_ = this->opus_decoder_->get_required_output_buffer_size();
409 if (!this->output_transfer_buffer_->reallocate(this->free_buffer_required_)) {
410 // Couldn't reallocate output buffer
412 }
413 } else {
414 ESP_LOGE(TAG, "Opus decoder failed: %" PRId8, result);
416 }
418}
419#endif
420
422 if (!this->audio_stream_info_.has_value()) {
423 // Header hasn't been processed
424
425 esp_audio_libs::wav_decoder::WAVDecoderResult result =
426 this->wav_decoder_->decode_header(this->input_buffer_->data(), this->input_buffer_->available());
427
428 if (result == esp_audio_libs::wav_decoder::WAV_DECODER_SUCCESS_IN_DATA) {
429 this->input_buffer_->consume(this->wav_decoder_->bytes_processed());
430
432 this->wav_decoder_->bits_per_sample(), this->wav_decoder_->num_channels(), this->wav_decoder_->sample_rate());
433
434 this->wav_bytes_left_ = this->wav_decoder_->chunk_bytes_left();
435 this->wav_has_known_end_ = (this->wav_bytes_left_ > 0);
437 } else if (result == esp_audio_libs::wav_decoder::WAV_DECODER_WARNING_INCOMPLETE_DATA) {
438 // Available data didn't have the full header
440 } else {
442 }
443 } else {
444 if (!this->wav_has_known_end_ || (this->wav_bytes_left_ > 0)) {
445 size_t bytes_to_copy = this->input_buffer_->available();
446
447 if (this->wav_has_known_end_) {
448 bytes_to_copy = std::min(bytes_to_copy, this->wav_bytes_left_);
449 }
450
451 bytes_to_copy = std::min(bytes_to_copy, this->output_transfer_buffer_->free());
452
453 if (bytes_to_copy > 0) {
454 std::memcpy(this->output_transfer_buffer_->get_buffer_end(), this->input_buffer_->data(), bytes_to_copy);
455 this->input_buffer_->consume(bytes_to_copy);
456 this->output_transfer_buffer_->increase_buffer_length(bytes_to_copy);
457 if (this->wav_has_known_end_) {
458 this->wav_bytes_left_ -= bytes_to_copy;
459 }
460 }
462 }
463 }
464
466}
467
468} // namespace audio
469} // namespace esphome
470
471#endif
optional< AudioStreamInfo > audio_stream_info_
esp_err_t start(AudioFileType audio_file_type)
Sets up decoding the file.
esp_audio_libs::helix_decoder::HMP3Decoder mp3_decoder_
FileDecoderState decode_opus_()
std::unique_ptr< AudioReadableBuffer > input_buffer_
std::unique_ptr< esp_audio_libs::flac::FLACDecoder > flac_decoder_
~AudioDecoder()
Deallocates the MP3 decoder (the flac, opus, and wav decoders are deallocated automatically)
std::unique_ptr< AudioSinkTransferBuffer > output_transfer_buffer_
FileDecoderState decode_flac_()
std::unique_ptr< micro_opus::OggOpusDecoder > opus_decoder_
esp_err_t add_source(std::weak_ptr< RingBuffer > &input_ring_buffer)
Adds a source ring buffer for raw file data.
std::unique_ptr< esp_audio_libs::wav_decoder::WAVDecoder > wav_decoder_
esp_err_t add_sink(std::weak_ptr< RingBuffer > &output_ring_buffer)
Adds a sink ring buffer for decoded audio.
AudioDecoderState decode(bool stop_gracefully)
Decodes audio from the ring buffer source and writes to the sink.
AudioDecoder(size_t input_buffer_size, size_t output_buffer_size)
Allocates the output transfer buffer and stores the input buffer size for later use by add_source()
Abstract interface for writing decoded audio data to a sink.
static std::unique_ptr< AudioSinkTransferBuffer > create(size_t buffer_size)
Creates a new sink transfer buffer.
static std::unique_ptr< AudioSourceTransferBuffer > create(size_t buffer_size)
Creates a new source transfer buffer.
bool state
Definition fan.h:2
Providing packet encoding functions for exchanging data with a remote host.
Definition a01nyub.cpp:7
void HOT delay(uint32_t ms)
Definition core.cpp:27
uint32_t IRAM_ATTR HOT millis()
Definition core.cpp:25
uint16_t length
Definition tt21100.cpp:0