ESPHome 2026.2.1
Loading...
Searching...
No Matches
mixer_speaker.cpp
Go to the documentation of this file.
1#include "mixer_speaker.h"
2
3#ifdef USE_ESP32
4
6#include "esphome/core/hal.h"
8#include "esphome/core/log.h"
9
10#include <algorithm>
11#include <array>
12#include <cstring>
13
14namespace esphome {
15namespace mixer_speaker {
16
17static const UBaseType_t MIXER_TASK_PRIORITY = 10;
18
19static const uint32_t STOPPING_TIMEOUT_MS = 5000;
20static const uint32_t TRANSFER_BUFFER_DURATION_MS = 50;
21static const uint32_t TASK_DELAY_MS = 25;
22
23static const size_t TASK_STACK_SIZE = 4096;
24
25static const int16_t MAX_AUDIO_SAMPLE_VALUE = INT16_MAX;
26static const int16_t MIN_AUDIO_SAMPLE_VALUE = INT16_MIN;
27
28static const char *const TAG = "speaker_mixer";
29
30// Gives the Q15 fixed point scaling factor to reduce by 0 dB, 1dB, ..., 50 dB
31// dB to PCM scaling factor formula: floating_point_scale_factor = 2^(-db/6.014)
32// float to Q15 fixed point formula: q15_scale_factor = floating_point_scale_factor * 2^(15)
33static const std::array<int16_t, 51> DECIBEL_REDUCTION_TABLE = {
34 32767, 29201, 26022, 23189, 20665, 18415, 16410, 14624, 13032, 11613, 10349, 9222, 8218, 7324, 6527, 5816, 5183,
35 4619, 4116, 3668, 3269, 2913, 2596, 2313, 2061, 1837, 1637, 1459, 1300, 1158, 1032, 920, 820, 731,
36 651, 580, 517, 461, 411, 366, 326, 291, 259, 231, 206, 183, 163, 146, 130, 116, 103};
37
38// Event bits for SourceSpeaker command processing
44
45// Event bits for mixer task control and state
46enum MixerTaskEventBits : uint32_t {
54 MIXER_TASK_ALL_BITS = 0x00FFFFFF, // All valid FreeRTOS event group bits
55};
56
57static inline uint32_t atomic_subtract_clamped(std::atomic<uint32_t> &var, uint32_t amount) {
58 uint32_t current = var.load(std::memory_order_acquire);
59 uint32_t subtracted = 0;
60 if (current > 0) {
61 uint32_t new_value;
62 do {
63 subtracted = std::min(amount, current);
64 new_value = current - subtracted;
65 } while (!var.compare_exchange_weak(current, new_value, std::memory_order_release, std::memory_order_acquire));
66 }
67 return subtracted;
68}
69
70static bool create_event_group(EventGroupHandle_t &event_group, Component *component) {
71 event_group = xEventGroupCreate();
72 if (event_group == nullptr) {
73 ESP_LOGE(TAG, "Failed to create event group");
74 component->mark_failed();
75 return false;
76 }
77 return true;
78}
79
81 ESP_LOGCONFIG(TAG,
82 "Mixer Source Speaker\n"
83 " Buffer Duration: %" PRIu32 " ms",
85 if (this->timeout_ms_.has_value()) {
86 ESP_LOGCONFIG(TAG, " Timeout: %" PRIu32 " ms", this->timeout_ms_.value());
87 } else {
88 ESP_LOGCONFIG(TAG, " Timeout: never");
89 }
90}
91
93 if (!create_event_group(this->event_group_, this)) {
94 return;
95 }
96
97 // Start with loop disabled since we begin in STATE_STOPPED with no pending commands
98 this->disable_loop();
99
100 this->parent_->get_output_speaker()->add_audio_output_callback([this](uint32_t new_frames, int64_t write_timestamp) {
101 // First, drain the playback delay (frames in pipeline before this source started contributing)
102 uint32_t delay_to_drain = atomic_subtract_clamped(this->playback_delay_frames_, new_frames);
103 uint32_t remaining_frames = new_frames - delay_to_drain;
104
105 // Then, count towards this source's pending playback frames
106 if (remaining_frames > 0) {
107 uint32_t speakers_playback_frames = atomic_subtract_clamped(this->pending_playback_frames_, remaining_frames);
108 if (speakers_playback_frames > 0) {
109 this->audio_output_callback_(speakers_playback_frames, write_timestamp);
110 }
111 }
112 });
113}
114
116 uint32_t event_bits = xEventGroupGetBits(this->event_group_);
117
118 // Process commands with priority: STOP > FINISH > START
119 // This ensures stop commands take precedence over conflicting start commands
120 if (event_bits & SOURCE_SPEAKER_COMMAND_STOP) {
121 if (this->state_ == speaker::STATE_RUNNING) {
122 // Clear both STOP and START bits - stop takes precedence
123 xEventGroupClearBits(this->event_group_, SOURCE_SPEAKER_COMMAND_STOP | SOURCE_SPEAKER_COMMAND_START);
124 this->enter_stopping_state_();
125 } else if (this->state_ == speaker::STATE_STOPPED) {
126 // Already stopped, just clear the command bits
127 xEventGroupClearBits(this->event_group_, SOURCE_SPEAKER_COMMAND_STOP | SOURCE_SPEAKER_COMMAND_START);
128 }
129 // Leave bits set if transitioning states (STARTING/STOPPING) - will be processed once state allows
130 } else if (event_bits & SOURCE_SPEAKER_COMMAND_FINISH) {
131 if (this->state_ == speaker::STATE_RUNNING) {
132 xEventGroupClearBits(this->event_group_, SOURCE_SPEAKER_COMMAND_FINISH);
133 this->stop_gracefully_ = true;
134 } else if (this->state_ == speaker::STATE_STOPPED) {
135 // Already stopped, just clear the command bit
136 xEventGroupClearBits(this->event_group_, SOURCE_SPEAKER_COMMAND_FINISH);
137 }
138 // Leave bit set if transitioning states - will be processed once state allows
139 } else if (event_bits & SOURCE_SPEAKER_COMMAND_START) {
140 if (this->state_ == speaker::STATE_STOPPED) {
141 xEventGroupClearBits(this->event_group_, SOURCE_SPEAKER_COMMAND_START);
143 } else if (this->state_ == speaker::STATE_RUNNING) {
144 // Already running, just clear the command bit
145 xEventGroupClearBits(this->event_group_, SOURCE_SPEAKER_COMMAND_START);
146 }
147 // Leave bit set if transitioning states - will be processed once state allows
148 }
149 // Process state machine
150 switch (this->state_) {
152 esp_err_t err = this->start_();
153 if (err == ESP_OK) {
154 this->pending_playback_frames_.store(0, std::memory_order_release); // reset pending playback frames
155 this->playback_delay_frames_.store(0, std::memory_order_release); // reset playback delay
156 this->has_contributed_.store(false, std::memory_order_release); // reset contribution tracking
158 this->stop_gracefully_ = false;
159 this->last_seen_data_ms_ = millis();
160 this->status_clear_error();
161 } else {
162 switch (err) {
163 case ESP_ERR_NO_MEM:
164 this->status_set_error(LOG_STR("Not enough memory"));
165 break;
166 case ESP_ERR_NOT_SUPPORTED:
167 this->status_set_error(LOG_STR("Unsupported bit depth"));
168 break;
169 case ESP_ERR_INVALID_ARG:
170 this->status_set_error(LOG_STR("Incompatible audio streams"));
171 break;
172 case ESP_ERR_INVALID_STATE:
173 this->status_set_error(LOG_STR("Task failed"));
174 break;
175 default:
176 this->status_set_error(LOG_STR("Failed"));
177 break;
178 }
179
180 this->enter_stopping_state_();
181 }
182 break;
183 }
185 if (!this->transfer_buffer_->has_buffered_data() &&
186 (this->pending_playback_frames_.load(std::memory_order_acquire) == 0)) {
187 // No audio data in buffer waiting to get mixed and no frames are pending playback
188 if ((this->timeout_ms_.has_value() && ((millis() - this->last_seen_data_ms_) > this->timeout_ms_.value())) ||
189 this->stop_gracefully_) {
190 // Timeout exceeded or graceful stop requested
191 this->enter_stopping_state_();
192 }
193 }
194 break;
196 if ((this->parent_->get_output_speaker()->get_pause_state()) ||
197 ((millis() - this->stopping_start_ms_) > STOPPING_TIMEOUT_MS)) {
198 // If parent speaker is paused or if the stopping timeout is exceeded, force stop the output speaker
199 this->parent_->get_output_speaker()->stop();
200 }
201
202 if (this->parent_->get_output_speaker()->is_stopped() ||
203 (this->pending_playback_frames_.load(std::memory_order_acquire) == 0)) {
204 // Output speaker is stopped OR all pending playback frames have played
205 this->pending_playback_frames_.store(0, std::memory_order_release);
206 this->stop_gracefully_ = false;
207
209 }
210 break;
211 }
213 // Re-check event bits for any new commands that may have arrived
214 event_bits = xEventGroupGetBits(this->event_group_);
215 if (!(event_bits &
217 // No pending commands, disable loop to save CPU cycles
218 this->disable_loop();
219 }
220 break;
221 }
222}
223
224size_t SourceSpeaker::play(const uint8_t *data, size_t length, TickType_t ticks_to_wait) {
225 if (this->is_stopped()) {
226 this->start();
227 }
228 size_t bytes_written = 0;
229 std::shared_ptr<RingBuffer> temp_ring_buffer = this->ring_buffer_.lock();
230 if (temp_ring_buffer.use_count() > 0) {
231 // Only write to the ring buffer if the reference is valid
232 bytes_written = temp_ring_buffer->write_without_replacement(data, length, ticks_to_wait);
233 if (bytes_written > 0) {
234 this->last_seen_data_ms_ = millis();
235 }
236 } else {
237 // Delay to avoid repeatedly hammering while waiting for the speaker to start
238 vTaskDelay(ticks_to_wait);
239 }
240 return bytes_written;
241}
242
243void SourceSpeaker::send_command_(uint32_t command_bit, bool wake_loop) {
245 uint32_t event_bits = xEventGroupGetBits(this->event_group_);
246 if (!(event_bits & command_bit)) {
247 xEventGroupSetBits(this->event_group_, command_bit);
248#if defined(USE_SOCKET_SELECT_SUPPORT) && defined(USE_WAKE_LOOP_THREADSAFE)
249 if (wake_loop) {
251 }
252#endif
253 }
254}
255
257
259 const size_t ring_buffer_size = this->audio_stream_info_.ms_to_bytes(this->buffer_duration_ms_);
260 if (this->transfer_buffer_.use_count() == 0) {
261 this->transfer_buffer_ =
263
264 if (this->transfer_buffer_ == nullptr) {
265 return ESP_ERR_NO_MEM;
266 }
267
268 std::shared_ptr<RingBuffer> temp_ring_buffer = this->ring_buffer_.lock();
269 if (!temp_ring_buffer) {
270 temp_ring_buffer = RingBuffer::create(ring_buffer_size);
271 this->ring_buffer_ = temp_ring_buffer;
272 }
273
274 if (!temp_ring_buffer) {
275 return ESP_ERR_NO_MEM;
276 } else {
277 this->transfer_buffer_->set_source(temp_ring_buffer);
278 }
279 }
280
281 return this->parent_->start(this->audio_stream_info_);
282}
283
285
287
289 return ((this->transfer_buffer_.use_count() > 0) && this->transfer_buffer_->has_buffered_data());
290}
291
292void SourceSpeaker::set_mute_state(bool mute_state) {
293 this->mute_state_ = mute_state;
294 this->parent_->get_output_speaker()->set_mute_state(mute_state);
295}
296
298
299void SourceSpeaker::set_volume(float volume) {
300 this->volume_ = volume;
301 this->parent_->get_output_speaker()->set_volume(volume);
302}
303
305
306size_t SourceSpeaker::process_data_from_source(std::shared_ptr<audio::AudioSourceTransferBuffer> &transfer_buffer,
307 TickType_t ticks_to_wait) {
308 // Store current offset, as these samples are already ducked
309 const size_t current_length = transfer_buffer->available();
310
311 size_t bytes_read = transfer_buffer->transfer_data_from_source(ticks_to_wait);
312
313 uint32_t samples_to_duck = this->audio_stream_info_.bytes_to_samples(bytes_read);
314 if (samples_to_duck > 0) {
315 int16_t *current_buffer = reinterpret_cast<int16_t *>(transfer_buffer->get_buffer_start() + current_length);
316
317 duck_samples(current_buffer, samples_to_duck, &this->current_ducking_db_reduction_,
320 }
321
322 return bytes_read;
323}
324
325void SourceSpeaker::apply_ducking(uint8_t decibel_reduction, uint32_t duration) {
326 if (this->target_ducking_db_reduction_ != decibel_reduction) {
327 // Start transition from the previous target (which becomes the new current level)
329
330 this->target_ducking_db_reduction_ = decibel_reduction;
331
332 // Calculate the number of intermediate dB steps for the transition timing.
333 // Subtract 1 because the first step is taken immediately after this calculation.
334 uint8_t total_ducking_steps = 0;
336 // The dB reduction level is increasing (which results in quieter audio)
337 total_ducking_steps = this->target_ducking_db_reduction_ - this->current_ducking_db_reduction_ - 1;
339 } else {
340 // The dB reduction level is decreasing (which results in louder audio)
341 total_ducking_steps = this->current_ducking_db_reduction_ - this->target_ducking_db_reduction_ - 1;
343 }
344 if ((duration > 0) && (total_ducking_steps > 0)) {
346
347 this->samples_per_ducking_step_ = this->ducking_transition_samples_remaining_ / total_ducking_steps;
349 this->samples_per_ducking_step_ * total_ducking_steps; // adjust for integer division rounding
350
352 } else {
355 }
356 }
357}
358
359void SourceSpeaker::duck_samples(int16_t *input_buffer, uint32_t input_samples_to_duck,
360 int8_t *current_ducking_db_reduction, uint32_t *ducking_transition_samples_remaining,
361 uint32_t samples_per_ducking_step, int8_t db_change_per_ducking_step) {
362 if (*ducking_transition_samples_remaining > 0) {
363 // Ducking level is still transitioning
364
365 // Takes the ceiling of input_samples_to_duck/samples_per_ducking_step
366 uint32_t ducking_steps_in_batch =
367 input_samples_to_duck / samples_per_ducking_step + (input_samples_to_duck % samples_per_ducking_step != 0);
368
369 for (uint32_t i = 0; i < ducking_steps_in_batch; ++i) {
370 uint32_t samples_left_in_step = *ducking_transition_samples_remaining % samples_per_ducking_step;
371
372 if (samples_left_in_step == 0) {
373 samples_left_in_step = samples_per_ducking_step;
374 }
375
376 uint32_t samples_to_duck = std::min(input_samples_to_duck, samples_left_in_step);
377 samples_to_duck = std::min(samples_to_duck, *ducking_transition_samples_remaining);
378
379 // Ensure we only point to valid index in the Q15 scaling factor table
380 uint8_t safe_db_reduction_index =
381 clamp<uint8_t>(*current_ducking_db_reduction, 0, DECIBEL_REDUCTION_TABLE.size() - 1);
382 int16_t q15_scale_factor = DECIBEL_REDUCTION_TABLE[safe_db_reduction_index];
383
384 audio::scale_audio_samples(input_buffer, input_buffer, q15_scale_factor, samples_to_duck);
385
386 if (samples_left_in_step - samples_to_duck == 0) {
387 // After scaling the current samples, we are ready to transition to the next step
388 *current_ducking_db_reduction += db_change_per_ducking_step;
389 }
390
391 input_buffer += samples_to_duck;
392 *ducking_transition_samples_remaining -= samples_to_duck;
393 input_samples_to_duck -= samples_to_duck;
394 }
395 }
396
397 if ((*current_ducking_db_reduction > 0) && (input_samples_to_duck > 0)) {
398 // Audio is ducked, but its not in the middle of a transition step
399
400 uint8_t safe_db_reduction_index =
401 clamp<uint8_t>(*current_ducking_db_reduction, 0, DECIBEL_REDUCTION_TABLE.size() - 1);
402 int16_t q15_scale_factor = DECIBEL_REDUCTION_TABLE[safe_db_reduction_index];
403
404 audio::scale_audio_samples(input_buffer, input_buffer, q15_scale_factor, input_samples_to_duck);
405 }
406}
407
413
415 ESP_LOGCONFIG(TAG,
416 "Speaker Mixer:\n"
417 " Number of output channels: %u",
418 this->output_channels_);
419}
420
422 if (!create_event_group(this->event_group_, this)) {
423 return;
424 }
425
426 // Register callback to track frames in the output pipeline
427 this->output_speaker_->add_audio_output_callback([this](uint32_t new_frames, int64_t write_timestamp) {
428 atomic_subtract_clamped(this->frames_in_pipeline_, new_frames);
429 });
430
431 // Start with loop disabled since no task is running and no commands are pending
432 this->disable_loop();
433}
434
436 uint32_t event_group_bits = xEventGroupGetBits(this->event_group_);
437
438 // Handle pending start request
439 if (event_group_bits & MIXER_TASK_COMMAND_START) {
440 // Only start the task if it's fully stopped and cleaned up
441 if (!this->status_has_error() && (this->task_handle_ == nullptr) && (this->task_stack_buffer_ == nullptr)) {
442 esp_err_t err = this->start_task_();
443 switch (err) {
444 case ESP_OK:
445 xEventGroupClearBits(this->event_group_, MIXER_TASK_COMMAND_START);
446 break;
447 case ESP_ERR_NO_MEM:
448 ESP_LOGE(TAG, "Failed to start; retrying in 1 second");
449 this->status_momentary_error("memory-failure", 1000);
450 return;
451 case ESP_ERR_INVALID_STATE:
452 ESP_LOGE(TAG, "Failed to start; retrying in 1 second");
453 this->status_momentary_error("task-failure", 1000);
454 return;
455 default:
456 ESP_LOGE(TAG, "Failed to start; retrying in 1 second");
457 this->status_momentary_error("failure", 1000);
458 return;
459 }
460 }
461 }
462
463 if (event_group_bits & MIXER_TASK_STATE_STARTING) {
464 ESP_LOGD(TAG, "Starting");
465 xEventGroupClearBits(this->event_group_, MIXER_TASK_STATE_STARTING);
466 }
467 if (event_group_bits & MIXER_TASK_ERR_ESP_NO_MEM) {
468 this->status_set_error(LOG_STR("Not enough memory"));
469 xEventGroupClearBits(this->event_group_, MIXER_TASK_ERR_ESP_NO_MEM);
470 }
471 if (event_group_bits & MIXER_TASK_STATE_RUNNING) {
472 ESP_LOGV(TAG, "Started");
473 this->status_clear_error();
474 xEventGroupClearBits(this->event_group_, MIXER_TASK_STATE_RUNNING);
475 }
476 if (event_group_bits & MIXER_TASK_STATE_STOPPING) {
477 ESP_LOGV(TAG, "Stopping");
478 xEventGroupClearBits(this->event_group_, MIXER_TASK_STATE_STOPPING);
479 }
480 if (event_group_bits & MIXER_TASK_STATE_STOPPED) {
481 if (this->delete_task_() == ESP_OK) {
482 ESP_LOGD(TAG, "Stopped");
483 xEventGroupClearBits(this->event_group_, MIXER_TASK_ALL_BITS);
484 }
485 }
486
487 if (this->task_handle_ != nullptr) {
488 // If the mixer task is running, check if all source speakers are stopped
489
490 bool all_stopped = true;
491
492 for (auto &speaker : this->source_speakers_) {
493 all_stopped &= speaker->is_stopped();
494 }
495
496 if (all_stopped) {
497 // Send stop command signal to the mixer task since no source speakers are active
498 xEventGroupSetBits(this->event_group_, MIXER_TASK_COMMAND_STOP);
499 }
500 } else if (this->task_stack_buffer_ == nullptr) {
501 // Task is fully stopped and cleaned up, check if we can disable loop
502 event_group_bits = xEventGroupGetBits(this->event_group_);
503 if (event_group_bits == 0) {
504 // No pending events, disable loop to save CPU cycles
505 this->disable_loop();
506 }
507 }
508}
509
511 if (!this->audio_stream_info_.has_value()) {
512 if (stream_info.get_bits_per_sample() != 16) {
513 // Audio streams that don't have 16 bits per sample are not supported
514 return ESP_ERR_NOT_SUPPORTED;
515 }
516
517 this->audio_stream_info_ = audio::AudioStreamInfo(stream_info.get_bits_per_sample(), this->output_channels_,
518 stream_info.get_sample_rate());
520 } else {
521 if (!this->queue_mode_ && (stream_info.get_sample_rate() != this->audio_stream_info_.value().get_sample_rate())) {
522 // The two audio streams must have the same sample rate to mix properly if not in queue mode
523 return ESP_ERR_INVALID_ARG;
524 }
525 }
526
527 this->enable_loop_soon_any_context(); // ensure loop processes command
528
529 uint32_t event_bits = xEventGroupGetBits(this->event_group_);
530 if (!(event_bits & MIXER_TASK_COMMAND_START)) {
531 // Set MIXER_TASK_COMMAND_START bit if not already set, and then immediately wake for low latency
532 xEventGroupSetBits(this->event_group_, MIXER_TASK_COMMAND_START);
533#if defined(USE_SOCKET_SELECT_SUPPORT) && defined(USE_WAKE_LOOP_THREADSAFE)
535#endif
536 }
537
538 return ESP_OK;
539}
540
542 if (this->task_stack_buffer_ == nullptr) {
543 if (this->task_stack_in_psram_) {
545 this->task_stack_buffer_ = stack_allocator.allocate(TASK_STACK_SIZE);
546 } else {
548 this->task_stack_buffer_ = stack_allocator.allocate(TASK_STACK_SIZE);
549 }
550 }
551
552 if (this->task_stack_buffer_ == nullptr) {
553 return ESP_ERR_NO_MEM;
554 }
555
556 if (this->task_handle_ == nullptr) {
557 this->task_handle_ = xTaskCreateStatic(audio_mixer_task, "mixer", TASK_STACK_SIZE, (void *) this,
558 MIXER_TASK_PRIORITY, this->task_stack_buffer_, &this->task_stack_);
559 }
560
561 if (this->task_handle_ == nullptr) {
562 return ESP_ERR_INVALID_STATE;
563 }
564
565 return ESP_OK;
566}
567
569 if (this->task_handle_ != nullptr) {
570 // Delete the task
571 vTaskDelete(this->task_handle_);
572 this->task_handle_ = nullptr;
573 }
574
575 if ((this->task_handle_ == nullptr) && (this->task_stack_buffer_ != nullptr)) {
576 // Deallocate the task stack buffer
577 if (this->task_stack_in_psram_) {
579 stack_allocator.deallocate(this->task_stack_buffer_, TASK_STACK_SIZE);
580 } else {
582 stack_allocator.deallocate(this->task_stack_buffer_, TASK_STACK_SIZE);
583 }
584
585 this->task_stack_buffer_ = nullptr;
586 }
587
588 if ((this->task_handle_ != nullptr) || (this->task_stack_buffer_ != nullptr)) {
589 return ESP_ERR_INVALID_STATE;
590 }
591
592 return ESP_OK;
593}
594
595void MixerSpeaker::copy_frames(const int16_t *input_buffer, audio::AudioStreamInfo input_stream_info,
596 int16_t *output_buffer, audio::AudioStreamInfo output_stream_info,
597 uint32_t frames_to_transfer) {
598 uint8_t input_channels = input_stream_info.get_channels();
599 uint8_t output_channels = output_stream_info.get_channels();
600 const uint8_t max_input_channel_index = input_channels - 1;
601
602 if (input_channels == output_channels) {
603 size_t bytes_to_copy = input_stream_info.frames_to_bytes(frames_to_transfer);
604 memcpy(output_buffer, input_buffer, bytes_to_copy);
605
606 return;
607 }
608
609 for (uint32_t frame_index = 0; frame_index < frames_to_transfer; ++frame_index) {
610 for (uint8_t output_channel_index = 0; output_channel_index < output_channels; ++output_channel_index) {
611 uint8_t input_channel_index = std::min(output_channel_index, max_input_channel_index);
612 output_buffer[output_channels * frame_index + output_channel_index] =
613 input_buffer[input_channels * frame_index + input_channel_index];
614 }
615 }
616}
617
618void MixerSpeaker::mix_audio_samples(const int16_t *primary_buffer, audio::AudioStreamInfo primary_stream_info,
619 const int16_t *secondary_buffer, audio::AudioStreamInfo secondary_stream_info,
620 int16_t *output_buffer, audio::AudioStreamInfo output_stream_info,
621 uint32_t frames_to_mix) {
622 const uint8_t primary_channels = primary_stream_info.get_channels();
623 const uint8_t secondary_channels = secondary_stream_info.get_channels();
624 const uint8_t output_channels = output_stream_info.get_channels();
625
626 const uint8_t max_primary_channel_index = primary_channels - 1;
627 const uint8_t max_secondary_channel_index = secondary_channels - 1;
628
629 for (uint32_t frames_index = 0; frames_index < frames_to_mix; ++frames_index) {
630 for (uint8_t output_channel_index = 0; output_channel_index < output_channels; ++output_channel_index) {
631 const uint32_t secondary_channel_index = std::min(output_channel_index, max_secondary_channel_index);
632 const int32_t secondary_sample = secondary_buffer[frames_index * secondary_channels + secondary_channel_index];
633
634 const uint32_t primary_channel_index = std::min(output_channel_index, max_primary_channel_index);
635 const int32_t primary_sample =
636 static_cast<int32_t>(primary_buffer[frames_index * primary_channels + primary_channel_index]);
637
638 const int32_t added_sample = secondary_sample + primary_sample;
639
640 output_buffer[frames_index * output_channels + output_channel_index] =
641 static_cast<int16_t>(clamp<int32_t>(added_sample, MIN_AUDIO_SAMPLE_VALUE, MAX_AUDIO_SAMPLE_VALUE));
642 }
643 }
644}
645
647 MixerSpeaker *this_mixer = static_cast<MixerSpeaker *>(params);
648
649 xEventGroupSetBits(this_mixer->event_group_, MIXER_TASK_STATE_STARTING);
650
651 std::unique_ptr<audio::AudioSinkTransferBuffer> output_transfer_buffer = audio::AudioSinkTransferBuffer::create(
652 this_mixer->audio_stream_info_.value().ms_to_bytes(TRANSFER_BUFFER_DURATION_MS));
653
654 if (output_transfer_buffer == nullptr) {
655 xEventGroupSetBits(this_mixer->event_group_, MIXER_TASK_STATE_STOPPED | MIXER_TASK_ERR_ESP_NO_MEM);
656
657 vTaskSuspend(nullptr); // Suspend this task indefinitely until the loop method deletes it
658 }
659
660 output_transfer_buffer->set_sink(this_mixer->output_speaker_);
661
662 xEventGroupSetBits(this_mixer->event_group_, MIXER_TASK_STATE_RUNNING);
663
664 bool sent_finished = false;
665
666 // Pre-allocate vectors to avoid heap allocation in the loop (max 8 source speakers per schema)
667 FixedVector<SourceSpeaker *> speakers_with_data;
669 speakers_with_data.init(this_mixer->source_speakers_.size());
670 transfer_buffers_with_data.init(this_mixer->source_speakers_.size());
671
672 while (true) {
673 uint32_t event_group_bits = xEventGroupGetBits(this_mixer->event_group_);
674 if (event_group_bits & MIXER_TASK_COMMAND_STOP) {
675 break;
676 }
677
678 // Never shift the data in the output transfer buffer to avoid unnecessary, slow data moves
679 output_transfer_buffer->transfer_data_to_sink(pdMS_TO_TICKS(TASK_DELAY_MS), false);
680
681 const uint32_t output_frames_free =
682 this_mixer->audio_stream_info_.value().bytes_to_frames(output_transfer_buffer->free());
683
684 speakers_with_data.clear();
685 transfer_buffers_with_data.clear();
686
687 for (auto &speaker : this_mixer->source_speakers_) {
688 if (speaker->is_running() && !speaker->get_pause_state()) {
689 // Speaker is running and not paused, so it possibly can provide audio data
690 std::shared_ptr<audio::AudioSourceTransferBuffer> transfer_buffer = speaker->get_transfer_buffer().lock();
691 if (transfer_buffer.use_count() == 0) {
692 // No transfer buffer allocated, so skip processing this speaker
693 continue;
694 }
695 speaker->process_data_from_source(transfer_buffer, 0); // Transfers and ducks audio from source ring buffers
696
697 if (transfer_buffer->available() > 0) {
698 // Store the locked transfer buffers in their own vector to avoid releasing ownership until after the loop
699 transfer_buffers_with_data.push_back(transfer_buffer);
700 speakers_with_data.push_back(speaker);
701 }
702 }
703 }
704
705 if (transfer_buffers_with_data.empty()) {
706 // No audio available for transferring, block task temporarily
707 delay(TASK_DELAY_MS);
708 continue;
709 }
710
711 uint32_t frames_to_mix = output_frames_free;
712
713 if ((transfer_buffers_with_data.size() == 1) || this_mixer->queue_mode_) {
714 // Only one speaker has audio data, just copy samples over
715
716 audio::AudioStreamInfo active_stream_info = speakers_with_data[0]->get_audio_stream_info();
717
718 if (active_stream_info.get_sample_rate() ==
720 // Speaker's sample rate matches the output speaker's, copy directly
721
722 const uint32_t frames_available_in_buffer =
723 active_stream_info.bytes_to_frames(transfer_buffers_with_data[0]->available());
724 frames_to_mix = std::min(frames_to_mix, frames_available_in_buffer);
725 copy_frames(reinterpret_cast<int16_t *>(transfer_buffers_with_data[0]->get_buffer_start()), active_stream_info,
726 reinterpret_cast<int16_t *>(output_transfer_buffer->get_buffer_end()),
727 this_mixer->audio_stream_info_.value(), frames_to_mix);
728
729 // Set playback delay for newly contributing source
730 if (!speakers_with_data[0]->has_contributed_.load(std::memory_order_acquire)) {
731 speakers_with_data[0]->playback_delay_frames_.store(
732 this_mixer->frames_in_pipeline_.load(std::memory_order_acquire), std::memory_order_release);
733 speakers_with_data[0]->has_contributed_.store(true, std::memory_order_release);
734 }
735
736 // Update source speaker pending frames
737 speakers_with_data[0]->pending_playback_frames_.fetch_add(frames_to_mix, std::memory_order_release);
738 transfer_buffers_with_data[0]->decrease_buffer_length(active_stream_info.frames_to_bytes(frames_to_mix));
739
740 // Update output transfer buffer length and pipeline frame count
741 output_transfer_buffer->increase_buffer_length(
742 this_mixer->audio_stream_info_.value().frames_to_bytes(frames_to_mix));
743 this_mixer->frames_in_pipeline_.fetch_add(frames_to_mix, std::memory_order_release);
744 } else {
745 // Speaker's stream info doesn't match the output speaker's, so it's a new source speaker
746 if (!this_mixer->output_speaker_->is_stopped()) {
747 if (!sent_finished) {
748 this_mixer->output_speaker_->finish();
749 sent_finished = true; // Avoid repeatedly sending the finish command
750 }
751 } else {
752 // Speaker has finished writing the current audio, update the stream information and restart the speaker
753 this_mixer->audio_stream_info_ =
754 audio::AudioStreamInfo(active_stream_info.get_bits_per_sample(), this_mixer->output_channels_,
755 active_stream_info.get_sample_rate());
756 this_mixer->output_speaker_->set_audio_stream_info(this_mixer->audio_stream_info_.value());
757 this_mixer->output_speaker_->start();
758 // Reset pipeline frame count since we're starting fresh with a new sample rate
759 this_mixer->frames_in_pipeline_.store(0, std::memory_order_release);
760 sent_finished = false;
761 }
762 }
763 } else {
764 // Determine how many frames to mix
765 for (size_t i = 0; i < transfer_buffers_with_data.size(); ++i) {
766 const uint32_t frames_available_in_buffer =
767 speakers_with_data[i]->get_audio_stream_info().bytes_to_frames(transfer_buffers_with_data[i]->available());
768 frames_to_mix = std::min(frames_to_mix, frames_available_in_buffer);
769 }
770 int16_t *primary_buffer = reinterpret_cast<int16_t *>(transfer_buffers_with_data[0]->get_buffer_start());
771 audio::AudioStreamInfo primary_stream_info = speakers_with_data[0]->get_audio_stream_info();
772
773 // Mix two streams together
774 for (size_t i = 1; i < transfer_buffers_with_data.size(); ++i) {
775 mix_audio_samples(primary_buffer, primary_stream_info,
776 reinterpret_cast<int16_t *>(transfer_buffers_with_data[i]->get_buffer_start()),
777 speakers_with_data[i]->get_audio_stream_info(),
778 reinterpret_cast<int16_t *>(output_transfer_buffer->get_buffer_end()),
779 this_mixer->audio_stream_info_.value(), frames_to_mix);
780
781 if (i != transfer_buffers_with_data.size() - 1) {
782 // Need to mix more streams together, point primary buffer and stream info to the already mixed output
783 primary_buffer = reinterpret_cast<int16_t *>(output_transfer_buffer->get_buffer_end());
784 primary_stream_info = this_mixer->audio_stream_info_.value();
785 }
786 }
787
788 // Get current pipeline depth for delay calculation (before incrementing)
789 uint32_t current_pipeline_frames = this_mixer->frames_in_pipeline_.load(std::memory_order_acquire);
790
791 // Update source transfer buffer lengths and add new audio durations to the source speaker pending playbacks
792 for (size_t i = 0; i < transfer_buffers_with_data.size(); ++i) {
793 // Set playback delay for newly contributing sources
794 if (!speakers_with_data[i]->has_contributed_.load(std::memory_order_acquire)) {
795 speakers_with_data[i]->playback_delay_frames_.store(current_pipeline_frames, std::memory_order_release);
796 speakers_with_data[i]->has_contributed_.store(true, std::memory_order_release);
797 }
798
799 speakers_with_data[i]->pending_playback_frames_.fetch_add(frames_to_mix, std::memory_order_release);
800 transfer_buffers_with_data[i]->decrease_buffer_length(
801 speakers_with_data[i]->get_audio_stream_info().frames_to_bytes(frames_to_mix));
802 }
803
804 // Update output transfer buffer length and pipeline frame count (once, not per source)
805 output_transfer_buffer->increase_buffer_length(
806 this_mixer->audio_stream_info_.value().frames_to_bytes(frames_to_mix));
807 this_mixer->frames_in_pipeline_.fetch_add(frames_to_mix, std::memory_order_release);
808 }
809 }
810
811 xEventGroupSetBits(this_mixer->event_group_, MIXER_TASK_STATE_STOPPING);
812
813 // Reset pipeline frame count since the task is stopping
814 this_mixer->frames_in_pipeline_.store(0, std::memory_order_release);
815
816 output_transfer_buffer.reset();
817
818 xEventGroupSetBits(this_mixer->event_group_, MIXER_TASK_STATE_STOPPED);
819
820 vTaskSuspend(nullptr); // Suspend this task indefinitely until the loop method deletes it
821}
822
823} // namespace mixer_speaker
824} // namespace esphome
825
826#endif
void wake_loop_threadsafe()
Wake the main event loop from a FreeRTOS task Thread-safe, can be called from task context to immedia...
void status_momentary_error(const char *name, uint32_t length=5000)
Set error status flag and automatically clear it after a timeout.
void enable_loop_soon_any_context()
Thread and ISR-safe version of enable_loop() that can be called from any context.
bool status_has_error() const
void disable_loop()
Disable this component's loop.
Fixed-capacity vector - allocates once at runtime, never reallocates This avoids std::vector template...
Definition helpers.h:227
bool empty() const
Definition helpers.h:385
size_t size() const
Definition helpers.h:384
void push_back(const T &value)
Add element without bounds checking Caller must ensure sufficient capacity was allocated via init() S...
Definition helpers.h:344
void init(size_t n)
Definition helpers.h:317
An STL allocator that uses SPI or internal RAM.
Definition helpers.h:1647
void deallocate(T *p, size_t n)
Definition helpers.h:1705
T * allocate(size_t n)
Definition helpers.h:1667
static std::unique_ptr< RingBuffer > create(size_t len)
static std::unique_ptr< AudioSinkTransferBuffer > create(size_t buffer_size)
Creates a new sink transfer buffer.
static std::unique_ptr< AudioSourceTransferBuffer > create(size_t buffer_size)
Creates a new source transfer buffer.
size_t ms_to_bytes(uint32_t ms) const
Converts duration to bytes.
Definition audio.h:73
size_t frames_to_bytes(uint32_t frames) const
Converts frames to bytes.
Definition audio.h:53
uint8_t get_bits_per_sample() const
Definition audio.h:28
uint32_t ms_to_samples(uint32_t ms) const
Converts duration to samples.
Definition audio.h:68
uint32_t bytes_to_frames(size_t bytes) const
Convert bytes to frames.
Definition audio.h:43
uint8_t get_channels() const
Definition audio.h:29
uint32_t get_sample_rate() const
Definition audio.h:30
uint32_t bytes_to_samples(size_t bytes) const
Convert bytes to samples.
Definition audio.h:48
esp_err_t start_task_()
Starts the mixer task after allocating memory for the task stack.
esp_err_t delete_task_()
If the task is stopped, it sets the task handle to the nullptr and deallocates its stack.
esp_err_t start(audio::AudioStreamInfo &stream_info)
Starts the mixer task.
FixedVector< SourceSpeaker * > source_speakers_
speaker::Speaker * get_output_speaker() const
static void mix_audio_samples(const int16_t *primary_buffer, audio::AudioStreamInfo primary_stream_info, const int16_t *secondary_buffer, audio::AudioStreamInfo secondary_stream_info, int16_t *output_buffer, audio::AudioStreamInfo output_stream_info, uint32_t frames_to_mix)
Mixes the primary and secondary streams taking into account the number of channels in each stream.
static void copy_frames(const int16_t *input_buffer, audio::AudioStreamInfo input_stream_info, int16_t *output_buffer, audio::AudioStreamInfo output_stream_info, uint32_t frames_to_transfer)
Copies audio frames from the input buffer to the output buffer taking into account the number of chan...
std::atomic< uint32_t > frames_in_pipeline_
static void audio_mixer_task(void *params)
optional< audio::AudioStreamInfo > audio_stream_info_
std::shared_ptr< audio::AudioSourceTransferBuffer > transfer_buffer_
void set_mute_state(bool mute_state) override
Mute state changes are passed to the parent's output speaker.
static void duck_samples(int16_t *input_buffer, uint32_t input_samples_to_duck, int8_t *current_ducking_db_reduction, uint32_t *ducking_transition_samples_remaining, uint32_t samples_per_ducking_step, int8_t db_change_per_ducking_step)
Ducks audio samples by a specified amount.
void send_command_(uint32_t command_bit, bool wake_loop=false)
std::atomic< uint32_t > playback_delay_frames_
void apply_ducking(uint8_t decibel_reduction, uint32_t duration)
Sets the ducking level for the source speaker.
std::weak_ptr< RingBuffer > ring_buffer_
size_t play(const uint8_t *data, size_t length, TickType_t ticks_to_wait) override
size_t process_data_from_source(std::shared_ptr< audio::AudioSourceTransferBuffer > &transfer_buffer, TickType_t ticks_to_wait)
Transfers audio from the ring buffer into the transfer buffer.
void set_volume(float volume) override
Volume state changes are passed to the parent's output speaker.
std::atomic< uint32_t > pending_playback_frames_
bool has_value() const
Definition optional.h:92
value_type const & value() const
Definition optional.h:94
virtual void set_volume(float volume)
Definition speaker.h:71
void add_audio_output_callback(std::function< void(uint32_t, int64_t)> &&callback)
Callback function for sending the duration of the audio written to the speaker since the last callbac...
Definition speaker.h:109
virtual float get_volume()
Definition speaker.h:79
virtual bool get_pause_state() const
Definition speaker.h:62
CallbackManager< void(uint32_t, int64_t)> audio_output_callback_
Definition speaker.h:123
void set_audio_stream_info(const audio::AudioStreamInfo &audio_stream_info)
Definition speaker.h:99
audio::AudioStreamInfo & get_audio_stream_info()
Definition speaker.h:103
virtual bool get_mute_state()
Definition speaker.h:93
virtual void set_mute_state(bool mute_state)
Definition speaker.h:81
audio::AudioStreamInfo audio_stream_info_
Definition speaker.h:115
virtual void start()=0
virtual void finish()
Definition speaker.h:58
bool is_stopped() const
Definition speaker.h:67
virtual void stop()=0
const Component * component
Definition component.cpp:37
uint8_t duration
Definition msa3xx.h:0
void scale_audio_samples(const int16_t *audio_samples, int16_t *output_buffer, int16_t scale_factor, size_t samples_to_scale)
Scales Q15 fixed point audio samples.
Definition audio.cpp:57
Providing packet encoding functions for exchanging data with a remote host.
Definition a01nyub.cpp:7
void IRAM_ATTR HOT delay(uint32_t ms)
Definition core.cpp:26
uint32_t IRAM_ATTR HOT millis()
Definition core.cpp:25
Application App
Global storage of Application pointer - only one Application can exist.
uint16_t length
Definition tt21100.cpp:0