ESPHome 2026.3.0
Loading...
Searching...
No Matches
audio.h
Go to the documentation of this file.
1#pragma once
2
4
5#include <cstddef>
6#include <cstdint>
7
8namespace esphome {
9namespace audio {
10
12 /* Class to respresent important parameters of the audio stream that also provides helper function to convert between
13 * various audio related units.
14 *
15 * - An audio sample represents a unit of audio for one channel.
16 * - A frame represents a unit of audio with a sample for every channel.
17 *
18 * In general, converting between bytes, samples, and frames shouldn't result in rounding errors so long as frames
19 * are used as the main unit when transferring audio data. Durations may result in rounding for certain sample rates;
20 * e.g., 44.1 KHz. The ``frames_to_milliseconds_with_remainder`` function should be used for accuracy, as it takes
21 * into account the remainder rather than just ignoring any rounding.
22 */
23 public:
25 : AudioStreamInfo(16, 1, 16000){}; // Default values represent ESPHome's audio components historical values
26 AudioStreamInfo(uint8_t bits_per_sample, uint8_t channels, uint32_t sample_rate);
27
28 uint8_t get_bits_per_sample() const { return this->bits_per_sample_; }
29 uint8_t get_channels() const { return this->channels_; }
30 uint32_t get_sample_rate() const { return this->sample_rate_; }
31
36 uint32_t bytes_to_ms(size_t bytes) const {
37 return bytes * 1000 / (this->sample_rate_ * this->bytes_per_sample_ * this->channels_);
38 }
39
43 uint32_t bytes_to_frames(size_t bytes) const { return (bytes / (this->bytes_per_sample_ * this->channels_)); }
44
48 uint32_t bytes_to_samples(size_t bytes) const { return (bytes / this->bytes_per_sample_); }
49
53 size_t frames_to_bytes(uint32_t frames) const { return frames * this->bytes_per_sample_ * this->channels_; }
54
58 size_t samples_to_bytes(uint32_t samples) const { return samples * this->bytes_per_sample_; }
59
63 uint32_t ms_to_frames(uint32_t ms) const { return (ms * this->sample_rate_) / 1000; }
64
68 uint32_t ms_to_samples(uint32_t ms) const { return (ms * this->channels_ * this->sample_rate_) / 1000; }
69
73 size_t ms_to_bytes(uint32_t ms) const {
74 return (ms * this->bytes_per_sample_ * this->channels_ * this->sample_rate_) / 1000;
75 }
76
82
89
90 // Class comparison operators
91 bool operator==(const AudioStreamInfo &rhs) const;
92 bool operator!=(const AudioStreamInfo &rhs) const { return !operator==(rhs); }
93
94 protected:
96 uint8_t channels_;
98
99 // The greatest common divisor between 1000 ms = 1 second and the sample rate. Used to avoid accumulating error when
100 // converting from frames to duration. Computed at construction.
102
103 // Conversion factor derived from the number of bits per sample. Assumes audio data is aligned to the byte. Computed
104 // at construction.
106};
107
108enum class AudioFileType : uint8_t {
109 NONE = 0,
110#ifdef USE_AUDIO_FLAC_SUPPORT
111 FLAC,
112#endif
113#ifdef USE_AUDIO_MP3_SUPPORT
114 MP3,
115#endif
116#ifdef USE_AUDIO_OPUS_SUPPORT
117 OPUS,
118#endif
119 WAV,
120};
121
122struct AudioFile {
123 const uint8_t *data;
124 size_t length;
126};
127
131const char *audio_file_type_to_string(AudioFileType file_type);
132
138AudioFileType detect_audio_file_type(const char *content_type, const char *url);
139
145void scale_audio_samples(const int16_t *audio_samples, int16_t *output_buffer, int16_t scale_factor,
146 size_t samples_to_scale);
147
152inline int32_t unpack_audio_sample_to_q31(const uint8_t *data, size_t bytes_per_sample) {
153 int32_t sample = 0;
154 if (bytes_per_sample == 1) {
155 sample |= data[0] << 24;
156 } else if (bytes_per_sample == 2) {
157 sample |= data[0] << 16;
158 sample |= data[1] << 24;
159 } else if (bytes_per_sample == 3) {
160 sample |= data[0] << 8;
161 sample |= data[1] << 16;
162 sample |= data[2] << 24;
163 } else if (bytes_per_sample == 4) {
164 sample |= data[0];
165 sample |= data[1] << 8;
166 sample |= data[2] << 16;
167 sample |= data[3] << 24;
168 }
169
170 return sample;
171}
172
178inline void pack_q31_as_audio_sample(int32_t sample, uint8_t *data, size_t bytes_per_sample) {
179 if (bytes_per_sample == 1) {
180 data[0] = static_cast<uint8_t>(sample >> 24);
181 } else if (bytes_per_sample == 2) {
182 data[0] = static_cast<uint8_t>(sample >> 16);
183 data[1] = static_cast<uint8_t>(sample >> 24);
184 } else if (bytes_per_sample == 3) {
185 data[0] = static_cast<uint8_t>(sample >> 8);
186 data[1] = static_cast<uint8_t>(sample >> 16);
187 data[2] = static_cast<uint8_t>(sample >> 24);
188 } else if (bytes_per_sample == 4) {
189 data[0] = static_cast<uint8_t>(sample);
190 data[1] = static_cast<uint8_t>(sample >> 8);
191 data[2] = static_cast<uint8_t>(sample >> 16);
192 data[3] = static_cast<uint8_t>(sample >> 24);
193 }
194}
195
196} // namespace audio
197} // namespace esphome
bool operator!=(const AudioStreamInfo &rhs) const
Definition audio.h:92
size_t ms_to_bytes(uint32_t ms) const
Converts duration to bytes.
Definition audio.h:73
size_t frames_to_bytes(uint32_t frames) const
Converts frames to bytes.
Definition audio.h:53
uint8_t get_bits_per_sample() const
Definition audio.h:28
uint32_t frames_to_microseconds(uint32_t frames) const
Computes the duration, in microseconds, the given amount of frames represents.
Definition audio.cpp:26
uint32_t ms_to_samples(uint32_t ms) const
Converts duration to samples.
Definition audio.h:68
size_t samples_to_bytes(uint32_t samples) const
Converts samples to bytes.
Definition audio.h:58
uint32_t bytes_to_frames(size_t bytes) const
Convert bytes to frames.
Definition audio.h:43
uint8_t get_channels() const
Definition audio.h:29
uint32_t ms_to_frames(uint32_t ms) const
Converts duration to frames.
Definition audio.h:63
uint32_t frames_to_milliseconds_with_remainder(uint32_t *frames) const
Computes the duration, in milliseconds, the given amount of frames represents.
Definition audio.cpp:30
bool operator==(const AudioStreamInfo &rhs) const
Definition audio.cpp:39
uint32_t bytes_to_ms(size_t bytes) const
Convert bytes to duration in milliseconds.
Definition audio.h:36
uint32_t get_sample_rate() const
Definition audio.h:30
uint32_t bytes_to_samples(size_t bytes) const
Convert bytes to samples.
Definition audio.h:48
int32_t unpack_audio_sample_to_q31(const uint8_t *data, size_t bytes_per_sample)
Unpacks a quantized audio sample into a Q31 fixed-point number.
Definition audio.h:152
void scale_audio_samples(const int16_t *audio_samples, int16_t *output_buffer, int16_t scale_factor, size_t samples_to_scale)
Scales Q15 fixed point audio samples.
Definition audio.cpp:117
void pack_q31_as_audio_sample(int32_t sample, uint8_t *data, size_t bytes_per_sample)
Packs a Q31 fixed-point number as an audio sample with the specified number of bytes per sample.
Definition audio.h:178
const char * audio_file_type_to_string(AudioFileType file_type)
Helper function to convert file type to a const char string.
Definition audio.cpp:44
AudioFileType detect_audio_file_type(const char *content_type, const char *url)
Detect audio file type from a Content-Type header value and/or URL extension.
Definition audio.cpp:65
Providing packet encoding functions for exchanging data with a remote host.
Definition a01nyub.cpp:7
static void uint32_t
const uint8_t * data
Definition audio.h:123
AudioFileType file_type
Definition audio.h:125