ESPHome 2026.5.0-dev
Loading...
Searching...
No Matches
audio_decoder.cpp
Go to the documentation of this file.
1#include "audio_decoder.h"
2
3#ifdef USE_ESP32
4
5#include "esphome/core/hal.h"
6#include "esphome/core/log.h"
7
8namespace esphome {
9namespace audio {
10
11static const char *const TAG = "audio.decoder";
12
13static const uint32_t DECODING_TIMEOUT_MS = 50; // The decode function will yield after this duration
14static const uint32_t READ_WRITE_TIMEOUT_MS = 20; // Timeout for transferring audio data
15
16static const uint32_t MAX_POTENTIALLY_FAILED_COUNT = 10;
17
18AudioDecoder::AudioDecoder(size_t input_buffer_size, size_t output_buffer_size)
19 : input_buffer_size_(input_buffer_size) {
21}
22
24#ifdef USE_AUDIO_MP3_SUPPORT
26 esp_audio_libs::helix_decoder::MP3FreeDecoder(this->mp3_decoder_);
27 }
28#endif
29}
30
31esp_err_t AudioDecoder::add_source(std::weak_ptr<RingBuffer> &input_ring_buffer) {
33 if (source == nullptr) {
34 return ESP_ERR_NO_MEM;
35 }
36 source->set_source(input_ring_buffer);
37 this->input_buffer_ = std::move(source);
38 return ESP_OK;
39}
40
41esp_err_t AudioDecoder::add_source(const uint8_t *data_pointer, size_t length) {
42 auto source = make_unique<ConstAudioSourceBuffer>();
43 source->set_data(data_pointer, length);
44 this->input_buffer_ = std::move(source);
45 return ESP_OK;
46}
47
48esp_err_t AudioDecoder::add_sink(std::weak_ptr<RingBuffer> &output_ring_buffer) {
49 if (this->output_transfer_buffer_ != nullptr) {
50 this->output_transfer_buffer_->set_sink(output_ring_buffer);
51 return ESP_OK;
52 }
53 return ESP_ERR_NO_MEM;
54}
55
56#ifdef USE_SPEAKER
58 if (this->output_transfer_buffer_ != nullptr) {
59 this->output_transfer_buffer_->set_sink(speaker);
60 return ESP_OK;
61 }
62 return ESP_ERR_NO_MEM;
63}
64#endif
65
67 if (this->output_transfer_buffer_ != nullptr) {
68 this->output_transfer_buffer_->set_sink(callback);
69 return ESP_OK;
70 }
71 return ESP_ERR_NO_MEM;
72}
73
74esp_err_t AudioDecoder::start(AudioFileType audio_file_type) {
75 if (this->output_transfer_buffer_ == nullptr) {
76 return ESP_ERR_NO_MEM;
77 }
78
79 this->audio_file_type_ = audio_file_type;
80
82 this->end_of_file_ = false;
83
84 switch (this->audio_file_type_) {
85#ifdef USE_AUDIO_FLAC_SUPPORT
87 this->flac_decoder_ = make_unique<micro_flac::FLACDecoder>();
89 this->output_transfer_buffer_->capacity(); // Adjusted and reallocated after reading the header
90 this->decoder_buffers_internally_ = true;
91 break;
92#endif
93#ifdef USE_AUDIO_MP3_SUPPORT
95 this->mp3_decoder_ = esp_audio_libs::helix_decoder::MP3InitDecoder();
96
97 // MP3 always has 1152 samples per chunk
98 this->free_buffer_required_ = 1152 * sizeof(int16_t) * 2; // samples * size per sample * channels
99
100 // Always reallocate the output transfer buffer to the smallest necessary size
101 this->output_transfer_buffer_->reallocate(this->free_buffer_required_);
102 break;
103#endif
104#ifdef USE_AUDIO_OPUS_SUPPORT
106 this->opus_decoder_ = make_unique<micro_opus::OggOpusDecoder>();
108 this->output_transfer_buffer_->capacity(); // Adjusted and reallocated after reading the header
109 this->decoder_buffers_internally_ = true;
110 break;
111#endif
113 this->wav_decoder_ = make_unique<esp_audio_libs::wav_decoder::WAVDecoder>();
114 this->wav_decoder_->reset();
115
116 // Processing WAVs doesn't actually require a specific amount of buffer size, as it is already in PCM format.
117 // Thus, we don't reallocate to a minimum size.
118 this->free_buffer_required_ = 1024;
119 if (this->output_transfer_buffer_->capacity() < this->free_buffer_required_) {
120 this->output_transfer_buffer_->reallocate(this->free_buffer_required_);
121 }
122 break;
124 default:
125 return ESP_ERR_NOT_SUPPORTED;
126 break;
127 }
128
129 return ESP_OK;
130}
131
133 if (this->input_buffer_ == nullptr) {
135 }
136
137 if (stop_gracefully) {
138 if (this->output_transfer_buffer_->available() == 0) {
139 if (this->end_of_file_) {
140 // The file decoder indicates it reached the end of file
142 }
143
144 if (!this->input_buffer_->has_buffered_data()) {
145 // If all the internal buffers are empty, the decoding is done
147 }
148 }
149 }
150
151 if (this->potentially_failed_count_ > MAX_POTENTIALLY_FAILED_COUNT) {
152 if (stop_gracefully) {
153 // No more new data is going to come in, so decoding is done
155 }
157 }
158
160
161 uint32_t decoding_start = millis();
162
163 bool first_loop_iteration = true;
164
165 size_t bytes_processed = 0;
166 size_t bytes_available_before_processing = 0;
167
169 // Transfer decoded out
170 if (!this->pause_output_) {
171 // Never shift the data in the output transfer buffer to avoid unnecessary, slow data moves
172 size_t bytes_written =
173 this->output_transfer_buffer_->transfer_data_to_sink(pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS), false);
174
175 if (this->audio_stream_info_.has_value()) {
176 this->accumulated_frames_written_ += this->audio_stream_info_.value().bytes_to_frames(bytes_written);
177 this->playback_ms_ +=
178 this->audio_stream_info_.value().frames_to_milliseconds_with_remainder(&this->accumulated_frames_written_);
179 }
180 } else {
181 // If paused, block to avoid wasting CPU resources
182 delay(READ_WRITE_TIMEOUT_MS);
183 }
184
185 // Verify there is enough space to store more decoded audio and that the function hasn't been running too long
186 if ((this->output_transfer_buffer_->free() < this->free_buffer_required_) ||
187 (millis() - decoding_start > DECODING_TIMEOUT_MS)) {
189 }
190
191 // Decode more audio
192
193 // Only shift data on the first loop iteration to avoid unnecessary, slow moves
194 // If the decoder buffers internally, then never shift
195 size_t bytes_read = this->input_buffer_->fill(pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS),
196 first_loop_iteration && !this->decoder_buffers_internally_);
197
198 if (!first_loop_iteration && (this->input_buffer_->available() < bytes_processed)) {
199 // Less data is available than what was processed in last iteration, so don't attempt to decode.
200 // This attempts to avoid the decoder from consistently trying to decode an incomplete frame. The transfer buffer
201 // will shift the remaining data to the start and copy more from the source the next time the decode function is
202 // called
203 break;
204 }
205
206 bytes_available_before_processing = this->input_buffer_->available();
207
208 if ((this->potentially_failed_count_ > 0) && (bytes_read == 0)) {
209 // Failed to decode in last attempt and there is no new data
210
211 if ((this->input_buffer_->free() == 0) && first_loop_iteration) {
212 // The input buffer is full (or read-only, e.g. const flash source). Since it previously failed on the exact
213 // same data, we can never recover. For const sources this is correct: the entire file is already available, so
214 // a decode failure is genuine, not a transient out-of-data condition.
216 } else {
217 // Attempt to get more data next time
219 }
220 } else if (this->input_buffer_->available() == 0) {
221 // No data to decode, attempt to get more data next time
223 } else {
224 switch (this->audio_file_type_) {
225#ifdef USE_AUDIO_FLAC_SUPPORT
227 state = this->decode_flac_();
228 break;
229#endif
230#ifdef USE_AUDIO_MP3_SUPPORT
232 state = this->decode_mp3_();
233 break;
234#endif
235#ifdef USE_AUDIO_OPUS_SUPPORT
237 state = this->decode_opus_();
238 break;
239#endif
241 state = this->decode_wav_();
242 break;
244 default:
246 break;
247 }
248 }
249
250 first_loop_iteration = false;
251 bytes_processed = bytes_available_before_processing - this->input_buffer_->available();
252
255 } else if (state == FileDecoderState::END_OF_FILE) {
256 this->end_of_file_ = true;
257 } else if (state == FileDecoderState::FAILED) {
261 }
262 }
264}
265
266#ifdef USE_AUDIO_FLAC_SUPPORT
268 size_t bytes_consumed, samples_decoded;
269
270 micro_flac::FLACDecoderResult result = this->flac_decoder_->decode(
271 this->input_buffer_->data(), this->input_buffer_->available(), this->output_transfer_buffer_->get_buffer_end(),
272 this->output_transfer_buffer_->free(), bytes_consumed, samples_decoded);
273
274 if (result == micro_flac::FLAC_DECODER_SUCCESS) {
275 if (samples_decoded > 0 && this->audio_stream_info_.has_value()) {
276 this->output_transfer_buffer_->increase_buffer_length(
277 this->audio_stream_info_.value().samples_to_bytes(samples_decoded));
278 }
279 this->input_buffer_->consume(bytes_consumed);
280 } else if (result == micro_flac::FLAC_DECODER_HEADER_READY) {
281 // Header just parsed, stream info now available
282 const auto &info = this->flac_decoder_->get_stream_info();
283 this->audio_stream_info_ = audio::AudioStreamInfo(info.bits_per_sample(), info.num_channels(), info.sample_rate());
284
285 // Reallocate the output transfer buffer to the required size
286 this->free_buffer_required_ = this->flac_decoder_->get_output_buffer_size_samples() * info.bytes_per_sample();
287 if (!this->output_transfer_buffer_->reallocate(this->free_buffer_required_)) {
289 }
290 this->input_buffer_->consume(bytes_consumed);
291 } else if (result == micro_flac::FLAC_DECODER_END_OF_STREAM) {
292 this->input_buffer_->consume(bytes_consumed);
294 } else if (result == micro_flac::FLAC_DECODER_NEED_MORE_DATA) {
295 this->input_buffer_->consume(bytes_consumed);
297 } else if (result == micro_flac::FLAC_DECODER_ERROR_OUTPUT_TOO_SMALL) {
298 // Reallocate to decode the frame on the next call
299 const auto &info = this->flac_decoder_->get_stream_info();
300 this->free_buffer_required_ = this->flac_decoder_->get_output_buffer_size_samples() * info.bytes_per_sample();
301 if (!this->output_transfer_buffer_->reallocate(this->free_buffer_required_)) {
303 }
304 } else {
305 ESP_LOGE(TAG, "FLAC decoder failed: %d", static_cast<int>(result));
307 }
308
310}
311#endif
312
313#ifdef USE_AUDIO_MP3_SUPPORT
315 // Look for the next sync word
316 int buffer_length = (int) this->input_buffer_->available();
317 int32_t offset = esp_audio_libs::helix_decoder::MP3FindSyncWord(this->input_buffer_->data(), buffer_length);
318
319 if (offset < 0) {
320 // New data may have the sync word
321 this->input_buffer_->consume(buffer_length);
323 }
324
325 // Advance read pointer to match the offset for the syncword
326 this->input_buffer_->consume(offset);
327 const uint8_t *buffer_start = this->input_buffer_->data();
328
329 buffer_length = (int) this->input_buffer_->available();
330 int err = esp_audio_libs::helix_decoder::MP3Decode(this->mp3_decoder_, &buffer_start, &buffer_length,
331 (int16_t *) this->output_transfer_buffer_->get_buffer_end(), 0);
332
333 size_t consumed = this->input_buffer_->available() - buffer_length;
334 this->input_buffer_->consume(consumed);
335
336 if (err) {
337 switch (err) {
338 case esp_audio_libs::helix_decoder::ERR_MP3_OUT_OF_MEMORY:
339 [[fallthrough]];
340 case esp_audio_libs::helix_decoder::ERR_MP3_NULL_POINTER:
342 break;
343 default:
344 // Most errors are recoverable by moving on to the next frame, so mark as potentailly failed
346 break;
347 }
348 } else {
349 esp_audio_libs::helix_decoder::MP3FrameInfo mp3_frame_info;
350 esp_audio_libs::helix_decoder::MP3GetLastFrameInfo(this->mp3_decoder_, &mp3_frame_info);
351 if (mp3_frame_info.outputSamps > 0) {
352 int bytes_per_sample = (mp3_frame_info.bitsPerSample / 8);
353 this->output_transfer_buffer_->increase_buffer_length(mp3_frame_info.outputSamps * bytes_per_sample);
354
355 if (!this->audio_stream_info_.has_value()) {
356 this->audio_stream_info_ =
357 audio::AudioStreamInfo(mp3_frame_info.bitsPerSample, mp3_frame_info.nChans, mp3_frame_info.samprate);
358 }
359 }
360 }
361
363}
364#endif
365
366#ifdef USE_AUDIO_OPUS_SUPPORT
368 bool processed_header = this->opus_decoder_->is_initialized();
369
370 size_t bytes_consumed, samples_decoded;
371
372 micro_opus::OggOpusResult result = this->opus_decoder_->decode(
373 this->input_buffer_->data(), this->input_buffer_->available(), this->output_transfer_buffer_->get_buffer_end(),
374 this->output_transfer_buffer_->free(), bytes_consumed, samples_decoded);
375
376 if (result == micro_opus::OGG_OPUS_OK) {
377 if (!processed_header && this->opus_decoder_->is_initialized()) {
378 // Header processed and stream info is available
379 this->audio_stream_info_ =
380 audio::AudioStreamInfo(this->opus_decoder_->get_bit_depth(), this->opus_decoder_->get_channels(),
381 this->opus_decoder_->get_sample_rate());
382 }
383 if (samples_decoded > 0 && this->audio_stream_info_.has_value()) {
384 // Some audio was processed
385 this->output_transfer_buffer_->increase_buffer_length(
386 this->audio_stream_info_.value().frames_to_bytes(samples_decoded));
387 }
388 this->input_buffer_->consume(bytes_consumed);
389 } else if (result == micro_opus::OGG_OPUS_OUTPUT_BUFFER_TOO_SMALL) {
390 // Reallocate to decode the packet on the next call
391 this->free_buffer_required_ = this->opus_decoder_->get_required_output_buffer_size();
392 if (!this->output_transfer_buffer_->reallocate(this->free_buffer_required_)) {
393 // Couldn't reallocate output buffer
395 }
396 } else {
397 ESP_LOGE(TAG, "Opus decoder failed: %" PRId8, result);
399 }
401}
402#endif
403
405 if (!this->audio_stream_info_.has_value()) {
406 // Header hasn't been processed
407
408 esp_audio_libs::wav_decoder::WAVDecoderResult result =
409 this->wav_decoder_->decode_header(this->input_buffer_->data(), this->input_buffer_->available());
410
411 if (result == esp_audio_libs::wav_decoder::WAV_DECODER_SUCCESS_IN_DATA) {
412 this->input_buffer_->consume(this->wav_decoder_->bytes_processed());
413
415 this->wav_decoder_->bits_per_sample(), this->wav_decoder_->num_channels(), this->wav_decoder_->sample_rate());
416
417 this->wav_bytes_left_ = this->wav_decoder_->chunk_bytes_left();
418 this->wav_has_known_end_ = (this->wav_bytes_left_ > 0);
420 } else if (result == esp_audio_libs::wav_decoder::WAV_DECODER_WARNING_INCOMPLETE_DATA) {
421 // Available data didn't have the full header
423 } else {
425 }
426 } else {
427 if (!this->wav_has_known_end_ || (this->wav_bytes_left_ > 0)) {
428 size_t bytes_to_copy = this->input_buffer_->available();
429
430 if (this->wav_has_known_end_) {
431 bytes_to_copy = std::min(bytes_to_copy, this->wav_bytes_left_);
432 }
433
434 bytes_to_copy = std::min(bytes_to_copy, this->output_transfer_buffer_->free());
435
436 if (bytes_to_copy > 0) {
437 std::memcpy(this->output_transfer_buffer_->get_buffer_end(), this->input_buffer_->data(), bytes_to_copy);
438 this->input_buffer_->consume(bytes_to_copy);
439 this->output_transfer_buffer_->increase_buffer_length(bytes_to_copy);
440 if (this->wav_has_known_end_) {
441 this->wav_bytes_left_ -= bytes_to_copy;
442 }
443 }
445 }
446 }
447
449}
450
451} // namespace audio
452} // namespace esphome
453
454#endif
media_source::MediaSource * source
optional< AudioStreamInfo > audio_stream_info_
esp_err_t start(AudioFileType audio_file_type)
Sets up decoding the file.
esp_audio_libs::helix_decoder::HMP3Decoder mp3_decoder_
FileDecoderState decode_opus_()
std::unique_ptr< AudioReadableBuffer > input_buffer_
std::unique_ptr< micro_flac::FLACDecoder > flac_decoder_
~AudioDecoder()
Deallocates the MP3 decoder (the flac, opus, and wav decoders are deallocated automatically)
std::unique_ptr< AudioSinkTransferBuffer > output_transfer_buffer_
FileDecoderState decode_flac_()
std::unique_ptr< micro_opus::OggOpusDecoder > opus_decoder_
esp_err_t add_source(std::weak_ptr< RingBuffer > &input_ring_buffer)
Adds a source ring buffer for raw file data.
std::unique_ptr< esp_audio_libs::wav_decoder::WAVDecoder > wav_decoder_
esp_err_t add_sink(std::weak_ptr< RingBuffer > &output_ring_buffer)
Adds a sink ring buffer for decoded audio.
AudioDecoderState decode(bool stop_gracefully)
Decodes audio from the ring buffer source and writes to the sink.
AudioDecoder(size_t input_buffer_size, size_t output_buffer_size)
Allocates the output transfer buffer and stores the input buffer size for later use by add_source()
Abstract interface for writing decoded audio data to a sink.
static std::unique_ptr< AudioSinkTransferBuffer > create(size_t buffer_size)
Creates a new sink transfer buffer.
static std::unique_ptr< AudioSourceTransferBuffer > create(size_t buffer_size)
Creates a new source transfer buffer.
bool state
Definition fan.h:2
Providing packet encoding functions for exchanging data with a remote host.
Definition a01nyub.cpp:7
void HOT delay(uint32_t ms)
Definition core.cpp:28
uint32_t IRAM_ATTR HOT millis()
Definition core.cpp:26
static void uint32_t
uint16_t length
Definition tt21100.cpp:0