ESPHome 2026.5.0-dev
Loading...
Searching...
No Matches
microphone_source.cpp
Go to the documentation of this file.
1#include "microphone_source.h"
2
3namespace esphome {
4namespace microphone {
5
6static const int32_t Q25_MAX_VALUE = (1 << 25) - 1;
7static const int32_t Q25_MIN_VALUE = ~Q25_MAX_VALUE;
8
10 return audio::AudioStreamInfo(this->bits_per_sample_, this->channels_.count(),
11 this->mic_->get_audio_stream_info().get_sample_rate());
12}
13
15 if (!this->enabled_ && !this->passive_) {
16 this->enabled_ = true;
17 this->mic_->start();
18 }
19}
20
22 if (this->enabled_ && !this->passive_) {
23 this->enabled_ = false;
24 this->mic_->stop();
25 this->processed_samples_.reset();
26 }
27}
28
29void MicrophoneSource::process_audio_(const std::vector<uint8_t> &data, std::vector<uint8_t> &filtered_data) {
30 // - Bit depth conversions are obtained by truncating bits or padding with zeros - no dithering is applied.
31 // - In the comments, Qxx refers to a fixed point number with xx bits of precision for representing fractional values.
32 // For example, audio with a bit depth of 16 can store a sample in a int16, which can be considered a Q15 number.
33 // - All samples are converted to Q25 before applying the gain factor - this results in a small precision loss for
34 // data with 32 bits per sample. Since the maximum gain factor is 64 = (1<<6), this ensures that applying the gain
35 // will never overflow a 32 bit signed integer. This still retains more bit depth than what is audibly noticeable.
36 // - Loops for reading/writing data buffers are unrolled, assuming little endian, for a small performance increase.
37
38 const size_t source_bytes_per_sample = this->mic_->get_audio_stream_info().samples_to_bytes(1);
39 const uint32_t source_channels = this->mic_->get_audio_stream_info().get_channels();
40
41 const size_t source_bytes_per_frame = this->mic_->get_audio_stream_info().frames_to_bytes(1);
42
43 const uint32_t total_frames = this->mic_->get_audio_stream_info().bytes_to_frames(data.size());
44 const size_t target_bytes_per_sample = (this->bits_per_sample_ + 7) / 8;
45 const size_t target_bytes_per_frame = target_bytes_per_sample * this->channels_.count();
46
47 filtered_data.resize(target_bytes_per_frame * total_frames);
48
49 uint8_t *current_data = filtered_data.data();
50
51 for (uint32_t frame_index = 0; frame_index < total_frames; ++frame_index) {
52 for (uint32_t channel_index = 0; channel_index < source_channels; ++channel_index) {
53 if (this->channels_.test(channel_index)) {
54 // Channel's current sample is included in the target mask. Convert bits per sample, if necessary.
55
56 const uint32_t sample_index = frame_index * source_bytes_per_frame + channel_index * source_bytes_per_sample;
57
58 int32_t sample = audio::unpack_audio_sample_to_q31(&data[sample_index], source_bytes_per_sample); // Q31
59 sample >>= 6; // Q31 -> Q25
60
61 // Apply gain using multiplication
62 sample *= this->gain_factor_; // Q25
63
64 // Clamp ``sample`` in case gain multiplication overflows 25 bits
65 sample = clamp<int32_t>(sample, Q25_MIN_VALUE, Q25_MAX_VALUE); // Q25
66
67 sample *= (1 << 6); // Q25 -> Q31
68
69 audio::pack_q31_as_audio_sample(sample, current_data, target_bytes_per_sample);
70 current_data = current_data + target_bytes_per_sample;
71 }
72 }
73 }
74}
75
76} // namespace microphone
77} // namespace esphome
size_t frames_to_bytes(uint32_t frames) const
Converts frames to bytes.
Definition audio.h:53
size_t samples_to_bytes(uint32_t samples) const
Converts samples to bytes.
Definition audio.h:58
uint32_t bytes_to_frames(size_t bytes) const
Convert bytes to frames.
Definition audio.h:43
uint8_t get_channels() const
Definition audio.h:29
audio::AudioStreamInfo get_audio_stream_info()
Definition microphone.h:40
std::shared_ptr< std::vector< uint8_t > > processed_samples_
audio::AudioStreamInfo get_audio_stream_info()
Gets the AudioStreamInfo of the data after processing.
void process_audio_(const std::vector< uint8_t > &data, std::vector< uint8_t > &filtered_data)
int32_t unpack_audio_sample_to_q31(const uint8_t *data, size_t bytes_per_sample)
Unpacks a quantized audio sample into a Q31 fixed-point number.
Definition audio.h:152
void pack_q31_as_audio_sample(int32_t sample, uint8_t *data, size_t bytes_per_sample)
Packs a Q31 fixed-point number as an audio sample with the specified number of bytes per sample.
Definition audio.h:178
Providing packet encoding functions for exchanging data with a remote host.
Definition a01nyub.cpp:7
static void uint32_t