ESPHome 2025.5.0
Loading...
Searching...
No Matches
audio_decoder.cpp
Go to the documentation of this file.
1#include "audio_decoder.h"
2
3#ifdef USE_ESP32
4
5#include "esphome/core/hal.h"
6
7namespace esphome {
8namespace audio {
9
10static const uint32_t DECODING_TIMEOUT_MS = 50; // The decode function will yield after this duration
11static const uint32_t READ_WRITE_TIMEOUT_MS = 20; // Timeout for transferring audio data
12
13static const uint32_t MAX_POTENTIALLY_FAILED_COUNT = 10;
14
15AudioDecoder::AudioDecoder(size_t input_buffer_size, size_t output_buffer_size) {
18}
19
21#ifdef USE_AUDIO_MP3_SUPPORT
23 esp_audio_libs::helix_decoder::MP3FreeDecoder(this->mp3_decoder_);
24 }
25#endif
26}
27
28esp_err_t AudioDecoder::add_source(std::weak_ptr<RingBuffer> &input_ring_buffer) {
29 if (this->input_transfer_buffer_ != nullptr) {
30 this->input_transfer_buffer_->set_source(input_ring_buffer);
31 return ESP_OK;
32 }
33 return ESP_ERR_NO_MEM;
34}
35
36esp_err_t AudioDecoder::add_sink(std::weak_ptr<RingBuffer> &output_ring_buffer) {
37 if (this->output_transfer_buffer_ != nullptr) {
38 this->output_transfer_buffer_->set_sink(output_ring_buffer);
39 return ESP_OK;
40 }
41 return ESP_ERR_NO_MEM;
42}
43
44#ifdef USE_SPEAKER
46 if (this->output_transfer_buffer_ != nullptr) {
47 this->output_transfer_buffer_->set_sink(speaker);
48 return ESP_OK;
49 }
50 return ESP_ERR_NO_MEM;
51}
52#endif
53
54esp_err_t AudioDecoder::start(AudioFileType audio_file_type) {
55 if ((this->input_transfer_buffer_ == nullptr) || (this->output_transfer_buffer_ == nullptr)) {
56 return ESP_ERR_NO_MEM;
57 }
58
59 this->audio_file_type_ = audio_file_type;
60
62 this->end_of_file_ = false;
63
64 switch (this->audio_file_type_) {
65#ifdef USE_AUDIO_FLAC_SUPPORT
69 this->output_transfer_buffer_->capacity(); // Adjusted and reallocated after reading the header
70 break;
71#endif
72#ifdef USE_AUDIO_MP3_SUPPORT
74 this->mp3_decoder_ = esp_audio_libs::helix_decoder::MP3InitDecoder();
75
76 // MP3 always has 1152 samples per chunk
77 this->free_buffer_required_ = 1152 * sizeof(int16_t) * 2; // samples * size per sample * channels
78
79 // Always reallocate the output transfer buffer to the smallest necessary size
80 this->output_transfer_buffer_->reallocate(this->free_buffer_required_);
81 break;
82#endif
85 this->wav_decoder_->reset();
86
87 // Processing WAVs doesn't actually require a specific amount of buffer size, as it is already in PCM format.
88 // Thus, we don't reallocate to a minimum size.
89 this->free_buffer_required_ = 1024;
90 if (this->output_transfer_buffer_->capacity() < this->free_buffer_required_) {
91 this->output_transfer_buffer_->reallocate(this->free_buffer_required_);
92 }
93 break;
95 default:
96 return ESP_ERR_NOT_SUPPORTED;
97 break;
98 }
99
100 return ESP_OK;
101}
102
104 if (stop_gracefully) {
105 if (this->output_transfer_buffer_->available() == 0) {
106 if (this->end_of_file_) {
107 // The file decoder indicates it reached the end of file
109 }
110
111 if (!this->input_transfer_buffer_->has_buffered_data()) {
112 // If all the internal buffers are empty, the decoding is done
114 }
115 }
116 }
117
118 if (this->potentially_failed_count_ > MAX_POTENTIALLY_FAILED_COUNT) {
119 if (stop_gracefully) {
120 // No more new data is going to come in, so decoding is done
122 }
124 }
125
127
128 uint32_t decoding_start = millis();
129
130 bool first_loop_iteration = true;
131
132 size_t bytes_processed = 0;
133 size_t bytes_available_before_processing = 0;
134
136 // Transfer decoded out
137 if (!this->pause_output_) {
138 // Never shift the data in the output transfer buffer to avoid unnecessary, slow data moves
139 size_t bytes_written =
140 this->output_transfer_buffer_->transfer_data_to_sink(pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS), false);
141
142 if (this->audio_stream_info_.has_value()) {
143 this->accumulated_frames_written_ += this->audio_stream_info_.value().bytes_to_frames(bytes_written);
144 this->playback_ms_ +=
145 this->audio_stream_info_.value().frames_to_milliseconds_with_remainder(&this->accumulated_frames_written_);
146 }
147 } else {
148 // If paused, block to avoid wasting CPU resources
149 delay(READ_WRITE_TIMEOUT_MS);
150 }
151
152 // Verify there is enough space to store more decoded audio and that the function hasn't been running too long
153 if ((this->output_transfer_buffer_->free() < this->free_buffer_required_) ||
154 (millis() - decoding_start > DECODING_TIMEOUT_MS)) {
156 }
157
158 // Decode more audio
159
160 // Only shift data on the first loop iteration to avoid unnecessary, slow moves
161 size_t bytes_read = this->input_transfer_buffer_->transfer_data_from_source(pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS),
162 first_loop_iteration);
163
164 if (!first_loop_iteration && (this->input_transfer_buffer_->available() < bytes_processed)) {
165 // Less data is available than what was processed in last iteration, so don't attempt to decode.
166 // This attempts to avoid the decoder from consistently trying to decode an incomplete frame. The transfer buffer
167 // will shift the remaining data to the start and copy more from the source the next time the decode function is
168 // called
169 break;
170 }
171
172 bytes_available_before_processing = this->input_transfer_buffer_->available();
173
174 if ((this->potentially_failed_count_ > 0) && (bytes_read == 0)) {
175 // Failed to decode in last attempt and there is no new data
176
177 if ((this->input_transfer_buffer_->free() == 0) && first_loop_iteration) {
178 // The input buffer is full. Since it previously failed on the exact same data, we can never recover
180 } else {
181 // Attempt to get more data next time
183 }
184 } else if (this->input_transfer_buffer_->available() == 0) {
185 // No data to decode, attempt to get more data next time
187 } else {
188 switch (this->audio_file_type_) {
189#ifdef USE_AUDIO_FLAC_SUPPORT
191 state = this->decode_flac_();
192 break;
193#endif
194#ifdef USE_AUDIO_MP3_SUPPORT
196 state = this->decode_mp3_();
197 break;
198#endif
200 state = this->decode_wav_();
201 break;
203 default:
205 break;
206 }
207 }
208
209 first_loop_iteration = false;
210 bytes_processed = bytes_available_before_processing - this->input_transfer_buffer_->available();
211
214 } else if (state == FileDecoderState::END_OF_FILE) {
215 this->end_of_file_ = true;
216 } else if (state == FileDecoderState::FAILED) {
220 }
221 }
223}
224
225#ifdef USE_AUDIO_FLAC_SUPPORT
227 if (!this->audio_stream_info_.has_value()) {
228 // Header hasn't been read
229 auto result = this->flac_decoder_->read_header(this->input_transfer_buffer_->get_buffer_start(),
230 this->input_transfer_buffer_->available());
231
232 if (result == esp_audio_libs::flac::FLAC_DECODER_HEADER_OUT_OF_DATA) {
234 }
235
236 if (result != esp_audio_libs::flac::FLAC_DECODER_SUCCESS) {
237 // Couldn't read FLAC header
239 }
240
241 size_t bytes_consumed = this->flac_decoder_->get_bytes_index();
242 this->input_transfer_buffer_->decrease_buffer_length(bytes_consumed);
243
244 // Reallocate the output transfer buffer to the smallest necessary size
245 this->free_buffer_required_ = flac_decoder_->get_output_buffer_size_bytes();
246 if (!this->output_transfer_buffer_->reallocate(this->free_buffer_required_)) {
247 // Couldn't reallocate output buffer
249 }
250
251 this->audio_stream_info_ =
252 audio::AudioStreamInfo(this->flac_decoder_->get_sample_depth(), this->flac_decoder_->get_num_channels(),
253 this->flac_decoder_->get_sample_rate());
254
256 }
257
258 uint32_t output_samples = 0;
259 auto result = this->flac_decoder_->decode_frame(
260 this->input_transfer_buffer_->get_buffer_start(), this->input_transfer_buffer_->available(),
261 reinterpret_cast<int16_t *>(this->output_transfer_buffer_->get_buffer_end()), &output_samples);
262
263 if (result == esp_audio_libs::flac::FLAC_DECODER_ERROR_OUT_OF_DATA) {
264 // Not an issue, just needs more data that we'll get next time.
266 }
267
268 size_t bytes_consumed = this->flac_decoder_->get_bytes_index();
269 this->input_transfer_buffer_->decrease_buffer_length(bytes_consumed);
270
271 if (result > esp_audio_libs::flac::FLAC_DECODER_ERROR_OUT_OF_DATA) {
272 // Corrupted frame, don't retry with current buffer content, wait for new sync
274 }
275
276 // We have successfully decoded some input data and have new output data
277 this->output_transfer_buffer_->increase_buffer_length(
278 this->audio_stream_info_.value().samples_to_bytes(output_samples));
279
280 if (result == esp_audio_libs::flac::FLAC_DECODER_NO_MORE_FRAMES) {
282 }
283
285}
286#endif
287
288#ifdef USE_AUDIO_MP3_SUPPORT
290 // Look for the next sync word
291 int buffer_length = (int) this->input_transfer_buffer_->available();
292 int32_t offset =
293 esp_audio_libs::helix_decoder::MP3FindSyncWord(this->input_transfer_buffer_->get_buffer_start(), buffer_length);
294
295 if (offset < 0) {
296 // New data may have the sync word
297 this->input_transfer_buffer_->decrease_buffer_length(buffer_length);
299 }
300
301 // Advance read pointer to match the offset for the syncword
302 this->input_transfer_buffer_->decrease_buffer_length(offset);
303 uint8_t *buffer_start = this->input_transfer_buffer_->get_buffer_start();
304
305 buffer_length = (int) this->input_transfer_buffer_->available();
306 int err = esp_audio_libs::helix_decoder::MP3Decode(this->mp3_decoder_, &buffer_start, &buffer_length,
307 (int16_t *) this->output_transfer_buffer_->get_buffer_end(), 0);
308
309 size_t consumed = this->input_transfer_buffer_->available() - buffer_length;
310 this->input_transfer_buffer_->decrease_buffer_length(consumed);
311
312 if (err) {
313 switch (err) {
314 case esp_audio_libs::helix_decoder::ERR_MP3_OUT_OF_MEMORY:
315 // Intentional fallthrough
316 case esp_audio_libs::helix_decoder::ERR_MP3_NULL_POINTER:
318 break;
319 default:
320 // Most errors are recoverable by moving on to the next frame, so mark as potentailly failed
322 break;
323 }
324 } else {
325 esp_audio_libs::helix_decoder::MP3FrameInfo mp3_frame_info;
326 esp_audio_libs::helix_decoder::MP3GetLastFrameInfo(this->mp3_decoder_, &mp3_frame_info);
327 if (mp3_frame_info.outputSamps > 0) {
328 int bytes_per_sample = (mp3_frame_info.bitsPerSample / 8);
329 this->output_transfer_buffer_->increase_buffer_length(mp3_frame_info.outputSamps * bytes_per_sample);
330
331 if (!this->audio_stream_info_.has_value()) {
332 this->audio_stream_info_ =
333 audio::AudioStreamInfo(mp3_frame_info.bitsPerSample, mp3_frame_info.nChans, mp3_frame_info.samprate);
334 }
335 }
336 }
337
339}
340#endif
341
343 if (!this->audio_stream_info_.has_value()) {
344 // Header hasn't been processed
345
346 esp_audio_libs::wav_decoder::WAVDecoderResult result = this->wav_decoder_->decode_header(
347 this->input_transfer_buffer_->get_buffer_start(), this->input_transfer_buffer_->available());
348
349 if (result == esp_audio_libs::wav_decoder::WAV_DECODER_SUCCESS_IN_DATA) {
350 this->input_transfer_buffer_->decrease_buffer_length(this->wav_decoder_->bytes_processed());
351
353 this->wav_decoder_->bits_per_sample(), this->wav_decoder_->num_channels(), this->wav_decoder_->sample_rate());
354
355 this->wav_bytes_left_ = this->wav_decoder_->chunk_bytes_left();
356 this->wav_has_known_end_ = (this->wav_bytes_left_ > 0);
358 } else if (result == esp_audio_libs::wav_decoder::WAV_DECODER_WARNING_INCOMPLETE_DATA) {
359 // Available data didn't have the full header
361 } else {
363 }
364 } else {
365 if (!this->wav_has_known_end_ || (this->wav_bytes_left_ > 0)) {
366 size_t bytes_to_copy = this->input_transfer_buffer_->available();
367
368 if (this->wav_has_known_end_) {
369 bytes_to_copy = std::min(bytes_to_copy, this->wav_bytes_left_);
370 }
371
372 bytes_to_copy = std::min(bytes_to_copy, this->output_transfer_buffer_->free());
373
374 if (bytes_to_copy > 0) {
375 std::memcpy(this->output_transfer_buffer_->get_buffer_end(), this->input_transfer_buffer_->get_buffer_start(),
376 bytes_to_copy);
377 this->input_transfer_buffer_->decrease_buffer_length(bytes_to_copy);
378 this->output_transfer_buffer_->increase_buffer_length(bytes_to_copy);
379 if (this->wav_has_known_end_) {
380 this->wav_bytes_left_ -= bytes_to_copy;
381 }
382 }
384 }
385 }
386
388}
389
390} // namespace audio
391} // namespace esphome
392
393#endif
optional< AudioStreamInfo > audio_stream_info_
esp_err_t start(AudioFileType audio_file_type)
Sets up decoding the file.
esp_audio_libs::helix_decoder::HMP3Decoder mp3_decoder_
std::unique_ptr< esp_audio_libs::flac::FLACDecoder > flac_decoder_
~AudioDecoder()
Deallocates the MP3 decoder (the flac and wav decoders are deallocated automatically)
std::unique_ptr< AudioSinkTransferBuffer > output_transfer_buffer_
FileDecoderState decode_flac_()
esp_err_t add_source(std::weak_ptr< RingBuffer > &input_ring_buffer)
Adds a source ring buffer for raw file data.
std::unique_ptr< esp_audio_libs::wav_decoder::WAVDecoder > wav_decoder_
esp_err_t add_sink(std::weak_ptr< RingBuffer > &output_ring_buffer)
Adds a sink ring buffer for decoded audio.
std::unique_ptr< AudioSourceTransferBuffer > input_transfer_buffer_
AudioDecoderState decode(bool stop_gracefully)
Decodes audio from the ring buffer source and writes to the sink.
AudioDecoder(size_t input_buffer_size, size_t output_buffer_size)
Allocates the input and output transfer buffers.
static std::unique_ptr< AudioSinkTransferBuffer > create(size_t buffer_size)
Creates a new sink transfer buffer.
static std::unique_ptr< AudioSourceTransferBuffer > create(size_t buffer_size)
Creates a new source transfer buffer.
bool state
Definition fan.h:0
Providing packet encoding functions for exchanging data with a remote host.
Definition a01nyub.cpp:7
std::unique_ptr< T > make_unique(Args &&...args)
Definition helpers.h:85
void IRAM_ATTR HOT delay(uint32_t ms)
Definition core.cpp:28
uint32_t IRAM_ATTR HOT millis()
Definition core.cpp:27