ESPHome 2025.5.0
Loading...
Searching...
No Matches
audio.h
Go to the documentation of this file.
1#pragma once
2
4
5#include <cstddef>
6#include <cstdint>
7
8namespace esphome {
9namespace audio {
10
12 /* Class to respresent important parameters of the audio stream that also provides helper function to convert between
13 * various audio related units.
14 *
15 * - An audio sample represents a unit of audio for one channel.
16 * - A frame represents a unit of audio with a sample for every channel.
17 *
18 * In gneneral, converting between bytes, samples, and frames shouldn't result in rounding errors so long as frames
19 * are used as the main unit when transferring audio data. Durations may result in rounding for certain sample rates;
20 * e.g., 44.1 KHz. The ``frames_to_milliseconds_with_remainder`` function should be used for accuracy, as it takes
21 * into account the remainder rather than just ignoring any rounding.
22 */
23 public:
25 : AudioStreamInfo(16, 1, 16000){}; // Default values represent ESPHome's audio components historical values
26 AudioStreamInfo(uint8_t bits_per_sample, uint8_t channels, uint32_t sample_rate);
27
28 uint8_t get_bits_per_sample() const { return this->bits_per_sample_; }
29 uint8_t get_channels() const { return this->channels_; }
30 uint32_t get_sample_rate() const { return this->sample_rate_; }
31
36 uint32_t bytes_to_ms(size_t bytes) const {
37 return bytes * 1000 / (this->sample_rate_ * this->bytes_per_sample_ * this->channels_);
38 }
39
43 uint32_t bytes_to_frames(size_t bytes) const { return (bytes / (this->bytes_per_sample_ * this->channels_)); }
44
48 uint32_t bytes_to_samples(size_t bytes) const { return (bytes / this->bytes_per_sample_); }
49
53 size_t frames_to_bytes(uint32_t frames) const { return frames * this->bytes_per_sample_ * this->channels_; }
54
58 size_t samples_to_bytes(uint32_t samples) const { return samples * this->bytes_per_sample_; }
59
63 uint32_t ms_to_frames(uint32_t ms) const { return (ms * this->sample_rate_) / 1000; }
64
68 uint32_t ms_to_samples(uint32_t ms) const { return (ms * this->channels_ * this->sample_rate_) / 1000; }
69
73 size_t ms_to_bytes(uint32_t ms) const {
74 return (ms * this->bytes_per_sample_ * this->channels_ * this->sample_rate_) / 1000;
75 }
76
81 uint32_t frames_to_microseconds(uint32_t frames) const;
82
88 uint32_t frames_to_milliseconds_with_remainder(uint32_t *frames) const;
89
90 // Class comparison operators
91 bool operator==(const AudioStreamInfo &rhs) const;
92 bool operator!=(const AudioStreamInfo &rhs) const { return !operator==(rhs); }
93
94 protected:
96 uint8_t channels_;
97 uint32_t sample_rate_;
98
99 // The greatest common divisor between 1000 ms = 1 second and the sample rate. Used to avoid accumulating error when
100 // converting from frames to duration. Computed at construction.
102
103 // Conversion factor derived from the number of bits per sample. Assumes audio data is aligned to the byte. Computed
104 // at construction.
106};
107
108enum class AudioFileType : uint8_t {
109 NONE = 0,
110#ifdef USE_AUDIO_FLAC_SUPPORT
111 FLAC,
112#endif
113#ifdef USE_AUDIO_MP3_SUPPORT
114 MP3,
115#endif
116 WAV,
117};
118
119struct AudioFile {
120 const uint8_t *data;
121 size_t length;
123};
124
128const char *audio_file_type_to_string(AudioFileType file_type);
129
135void scale_audio_samples(const int16_t *audio_samples, int16_t *output_buffer, int16_t scale_factor,
136 size_t samples_to_scale);
137
142inline int32_t unpack_audio_sample_to_q31(const uint8_t *data, size_t bytes_per_sample) {
143 int32_t sample = 0;
144 if (bytes_per_sample == 1) {
145 sample |= data[0] << 24;
146 } else if (bytes_per_sample == 2) {
147 sample |= data[0] << 16;
148 sample |= data[1] << 24;
149 } else if (bytes_per_sample == 3) {
150 sample |= data[0] << 8;
151 sample |= data[1] << 16;
152 sample |= data[2] << 24;
153 } else if (bytes_per_sample == 4) {
154 sample |= data[0];
155 sample |= data[1] << 8;
156 sample |= data[2] << 16;
157 sample |= data[3] << 24;
158 }
159
160 return sample;
161}
162
168inline void pack_q31_as_audio_sample(int32_t sample, uint8_t *data, size_t bytes_per_sample) {
169 if (bytes_per_sample == 1) {
170 data[0] = static_cast<uint8_t>(sample >> 24);
171 } else if (bytes_per_sample == 2) {
172 data[0] = static_cast<uint8_t>(sample >> 16);
173 data[1] = static_cast<uint8_t>(sample >> 24);
174 } else if (bytes_per_sample == 3) {
175 data[0] = static_cast<uint8_t>(sample >> 8);
176 data[1] = static_cast<uint8_t>(sample >> 16);
177 data[2] = static_cast<uint8_t>(sample >> 24);
178 } else if (bytes_per_sample == 4) {
179 data[0] = static_cast<uint8_t>(sample);
180 data[1] = static_cast<uint8_t>(sample >> 8);
181 data[2] = static_cast<uint8_t>(sample >> 16);
182 data[3] = static_cast<uint8_t>(sample >> 24);
183 }
184}
185
186} // namespace audio
187} // namespace esphome
bool operator!=(const AudioStreamInfo &rhs) const
Definition audio.h:92
size_t ms_to_bytes(uint32_t ms) const
Converts duration to bytes.
Definition audio.h:73
size_t frames_to_bytes(uint32_t frames) const
Converts frames to bytes.
Definition audio.h:53
uint8_t get_bits_per_sample() const
Definition audio.h:28
uint32_t frames_to_microseconds(uint32_t frames) const
Computes the duration, in microseconds, the given amount of frames represents.
Definition audio.cpp:22
uint32_t ms_to_samples(uint32_t ms) const
Converts duration to samples.
Definition audio.h:68
size_t samples_to_bytes(uint32_t samples) const
Converts samples to bytes.
Definition audio.h:58
uint32_t bytes_to_frames(size_t bytes) const
Convert bytes to frames.
Definition audio.h:43
uint8_t get_channels() const
Definition audio.h:29
uint32_t ms_to_frames(uint32_t ms) const
Converts duration to frames.
Definition audio.h:63
uint32_t frames_to_milliseconds_with_remainder(uint32_t *frames) const
Computes the duration, in milliseconds, the given amount of frames represents.
Definition audio.cpp:26
bool operator==(const AudioStreamInfo &rhs) const
Definition audio.cpp:35
uint32_t bytes_to_ms(size_t bytes) const
Convert bytes to duration in milliseconds.
Definition audio.h:36
uint32_t get_sample_rate() const
Definition audio.h:30
uint32_t bytes_to_samples(size_t bytes) const
Convert bytes to samples.
Definition audio.h:48
int32_t unpack_audio_sample_to_q31(const uint8_t *data, size_t bytes_per_sample)
Unpacks a quantized audio sample into a Q31 fixed-point number.
Definition audio.h:142
void scale_audio_samples(const int16_t *audio_samples, int16_t *output_buffer, int16_t scale_factor, size_t samples_to_scale)
Scales Q15 fixed point audio samples.
Definition audio.cpp:57
void pack_q31_as_audio_sample(int32_t sample, uint8_t *data, size_t bytes_per_sample)
Packs a Q31 fixed-point number as an audio sample with the specified number of bytes per sample.
Definition audio.h:168
const char * audio_file_type_to_string(AudioFileType file_type)
Helper function to convert file type to a const char string.
Definition audio.cpp:40
Providing packet encoding functions for exchanging data with a remote host.
Definition a01nyub.cpp:7
const uint8_t * data
Definition audio.h:120
AudioFileType file_type
Definition audio.h:122