ESPHome 2025.6.3
Loading...
Searching...
No Matches
i2s_audio_microphone.cpp
Go to the documentation of this file.
2
3#ifdef USE_ESP32
4
5#ifdef USE_I2S_LEGACY
6#include <driver/i2s.h>
7#else
8#include <driver/i2s_std.h>
9#include <driver/i2s_pdm.h>
10#endif
11
12#include "esphome/core/hal.h"
13#include "esphome/core/log.h"
14
16
17namespace esphome {
18namespace i2s_audio {
19
20static const UBaseType_t MAX_LISTENERS = 16;
21
22static const uint32_t READ_DURATION_MS = 16;
23
24static const size_t TASK_STACK_SIZE = 4096;
25static const ssize_t TASK_PRIORITY = 23;
26
27// Use an exponential moving average to correct a DC offset with weight factor 1/1000
28static const int32_t DC_OFFSET_MOVING_AVERAGE_COEFFICIENT_DENOMINATOR = 1000;
29
30static const char *const TAG = "i2s_audio.microphone";
31
32enum MicrophoneEventGroupBits : uint32_t {
33 COMMAND_STOP = (1 << 0), // stops the microphone task, set and cleared by ``loop``
34
35 TASK_STARTING = (1 << 10), // set by mic task, cleared by ``loop``
36 TASK_RUNNING = (1 << 11), // set by mic task, cleared by ``loop``
37 TASK_STOPPED = (1 << 13), // set by mic task, cleared by ``loop``
38
39 ALL_BITS = 0x00FFFFFF, // All valid FreeRTOS event group bits
40};
41
43 ESP_LOGCONFIG(TAG, "Running setup");
44#ifdef USE_I2S_LEGACY
45#if SOC_I2S_SUPPORTS_ADC
46 if (this->adc_) {
47 if (this->parent_->get_port() != I2S_NUM_0) {
48 ESP_LOGE(TAG, "Internal ADC only works on I2S0");
49 this->mark_failed();
50 return;
51 }
52 } else
53#endif
54#endif
55 {
56 if (this->pdm_) {
57 if (this->parent_->get_port() != I2S_NUM_0) {
58 ESP_LOGE(TAG, "PDM only works on I2S0");
59 this->mark_failed();
60 return;
61 }
62 }
63 }
64
65 this->active_listeners_semaphore_ = xSemaphoreCreateCounting(MAX_LISTENERS, MAX_LISTENERS);
66 if (this->active_listeners_semaphore_ == nullptr) {
67 ESP_LOGE(TAG, "Creating semaphore failed");
68 this->mark_failed();
69 return;
70 }
71
72 this->event_group_ = xEventGroupCreate();
73 if (this->event_group_ == nullptr) {
74 ESP_LOGE(TAG, "Creating event group failed");
75 this->mark_failed();
76 return;
77 }
78
80}
81
83 ESP_LOGCONFIG(TAG,
84 "Microphone:\n"
85 " Pin: %d\n"
86 " PDM: %s\n"
87 " DC offset correction: %s",
88 static_cast<int8_t>(this->din_pin_), YESNO(this->pdm_), YESNO(this->correct_dc_offset_));
89}
90
92 uint8_t channel_count = 1;
93#ifdef USE_I2S_LEGACY
94 uint8_t bits_per_sample = this->bits_per_sample_;
95
96 if (this->channel_ == I2S_CHANNEL_FMT_RIGHT_LEFT) {
97 channel_count = 2;
98 }
99#else
100 uint8_t bits_per_sample = 16;
101 if (this->slot_bit_width_ != I2S_SLOT_BIT_WIDTH_AUTO) {
102 bits_per_sample = this->slot_bit_width_;
103 }
104
105 if (this->slot_mode_ == I2S_SLOT_MODE_STEREO) {
106 channel_count = 2;
107 }
108#endif
109
110#ifdef USE_ESP32_VARIANT_ESP32
111 // ESP32 reads audio aligned to a multiple of 2 bytes. For example, if configured for 24 bits per sample, then it will
112 // produce 32 bits per sample, where the actual data is in the most significant bits. Other ESP32 variants produce 24
113 // bits per sample in this situation.
114 if (bits_per_sample < 16) {
115 bits_per_sample = 16;
116 } else if ((bits_per_sample > 16) && (bits_per_sample <= 32)) {
117 bits_per_sample = 32;
118 }
119#endif
120
121 if (this->pdm_) {
122 bits_per_sample = 16; // PDM mics are always 16 bits per sample
123 }
124
125 this->audio_stream_info_ = audio::AudioStreamInfo(bits_per_sample, channel_count, this->sample_rate_);
126}
127
129 if (this->is_failed())
130 return;
131
132 xSemaphoreTake(this->active_listeners_semaphore_, 0);
133}
134
135bool I2SAudioMicrophone::start_driver_() {
136 if (!this->parent_->try_lock()) {
137 return false; // Waiting for another i2s to return lock
138 }
139 this->locked_driver_ = true;
140 esp_err_t err;
141
142#ifdef USE_I2S_LEGACY
143 i2s_driver_config_t config = {
144 .mode = (i2s_mode_t) (this->i2s_mode_ | I2S_MODE_RX),
145 .sample_rate = this->sample_rate_,
146 .bits_per_sample = this->bits_per_sample_,
147 .channel_format = this->channel_,
148 .communication_format = I2S_COMM_FORMAT_STAND_I2S,
149 .intr_alloc_flags = ESP_INTR_FLAG_LEVEL1,
150 .dma_buf_count = 4,
151 .dma_buf_len = 240, // Must be divisible by 3 to support 24 bits per sample on old driver and newer variants
152 .use_apll = this->use_apll_,
153 .tx_desc_auto_clear = false,
154 .fixed_mclk = 0,
155 .mclk_multiple = this->mclk_multiple_,
156 .bits_per_chan = this->bits_per_channel_,
157 };
158
159#if SOC_I2S_SUPPORTS_ADC
160 if (this->adc_) {
161 config.mode = (i2s_mode_t) (config.mode | I2S_MODE_ADC_BUILT_IN);
162 err = i2s_driver_install(this->parent_->get_port(), &config, 0, nullptr);
163 if (err != ESP_OK) {
164 ESP_LOGE(TAG, "Error installing driver: %s", esp_err_to_name(err));
165 return false;
166 }
167
168 err = i2s_set_adc_mode(ADC_UNIT_1, this->adc_channel_);
169 if (err != ESP_OK) {
170 ESP_LOGE(TAG, "Error setting ADC mode: %s", esp_err_to_name(err));
171 return false;
172 }
173
174 err = i2s_adc_enable(this->parent_->get_port());
175 if (err != ESP_OK) {
176 ESP_LOGE(TAG, "Error enabling ADC: %s", esp_err_to_name(err));
177 return false;
178 }
179 } else
180#endif
181 {
182 if (this->pdm_)
183 config.mode = (i2s_mode_t) (config.mode | I2S_MODE_PDM);
184
185 err = i2s_driver_install(this->parent_->get_port(), &config, 0, nullptr);
186 if (err != ESP_OK) {
187 ESP_LOGE(TAG, "Error installing driver: %s", esp_err_to_name(err));
188 return false;
189 }
190
191 i2s_pin_config_t pin_config = this->parent_->get_pin_config();
192 pin_config.data_in_num = this->din_pin_;
193
194 err = i2s_set_pin(this->parent_->get_port(), &pin_config);
195 if (err != ESP_OK) {
196 ESP_LOGE(TAG, "Error setting pin: %s", esp_err_to_name(err));
197 return false;
198 }
199 }
200#else
201 i2s_chan_config_t chan_cfg = {
202 .id = this->parent_->get_port(),
203 .role = this->i2s_role_,
204 .dma_desc_num = 4,
205 .dma_frame_num = 256,
206 .auto_clear = false,
207 };
208 /* Allocate a new RX channel and get the handle of this channel */
209 err = i2s_new_channel(&chan_cfg, NULL, &this->rx_handle_);
210 if (err != ESP_OK) {
211 ESP_LOGE(TAG, "Error creating channel: %s", esp_err_to_name(err));
212 return false;
213 }
214
215 i2s_clock_src_t clk_src = I2S_CLK_SRC_DEFAULT;
216#ifdef I2S_CLK_SRC_APLL
217 if (this->use_apll_) {
218 clk_src = I2S_CLK_SRC_APLL;
219 }
220#endif
221 i2s_std_gpio_config_t pin_config = this->parent_->get_pin_config();
222#if SOC_I2S_SUPPORTS_PDM_RX
223 if (this->pdm_) {
224 i2s_pdm_rx_clk_config_t clk_cfg = {
225 .sample_rate_hz = this->sample_rate_,
226 .clk_src = clk_src,
227 .mclk_multiple = this->mclk_multiple_,
228 .dn_sample_mode = I2S_PDM_DSR_8S,
229 };
230
231 i2s_pdm_rx_slot_config_t slot_cfg = I2S_PDM_RX_SLOT_DEFAULT_CONFIG(I2S_DATA_BIT_WIDTH_16BIT, this->slot_mode_);
232 switch (this->std_slot_mask_) {
233 case I2S_STD_SLOT_LEFT:
234 slot_cfg.slot_mask = I2S_PDM_SLOT_LEFT;
235 break;
236 case I2S_STD_SLOT_RIGHT:
237 slot_cfg.slot_mask = I2S_PDM_SLOT_RIGHT;
238 break;
239 case I2S_STD_SLOT_BOTH:
240 slot_cfg.slot_mask = I2S_PDM_SLOT_BOTH;
241 break;
242 }
243
244 /* Init the channel into PDM RX mode */
245 i2s_pdm_rx_config_t pdm_rx_cfg = {
246 .clk_cfg = clk_cfg,
247 .slot_cfg = slot_cfg,
248 .gpio_cfg =
249 {
250 .clk = pin_config.ws,
251 .din = this->din_pin_,
252 .invert_flags =
253 {
254 .clk_inv = pin_config.invert_flags.ws_inv,
255 },
256 },
257 };
258 err = i2s_channel_init_pdm_rx_mode(this->rx_handle_, &pdm_rx_cfg);
259 } else
260#endif
261 {
262 i2s_std_clk_config_t clk_cfg = {
263 .sample_rate_hz = this->sample_rate_,
264 .clk_src = clk_src,
265 .mclk_multiple = this->mclk_multiple_,
266 };
267 i2s_std_slot_config_t std_slot_cfg =
268 I2S_STD_PHILIPS_SLOT_DEFAULT_CONFIG((i2s_data_bit_width_t) this->slot_bit_width_, this->slot_mode_);
269 std_slot_cfg.slot_bit_width = this->slot_bit_width_;
270 std_slot_cfg.slot_mask = this->std_slot_mask_;
271
272 pin_config.din = this->din_pin_;
273
274 i2s_std_config_t std_cfg = {
275 .clk_cfg = clk_cfg,
276 .slot_cfg = std_slot_cfg,
277 .gpio_cfg = pin_config,
278 };
279 /* Initialize the channel */
280 err = i2s_channel_init_std_mode(this->rx_handle_, &std_cfg);
281 }
282 if (err != ESP_OK) {
283 ESP_LOGE(TAG, "Error initializing channel: %s", esp_err_to_name(err));
284 return false;
285 }
286
287 /* Before reading data, start the RX channel first */
288 i2s_channel_enable(this->rx_handle_);
289 if (err != ESP_OK) {
290 ESP_LOGE(TAG, "Enabling failed: %s", esp_err_to_name(err));
291 return false;
292 }
293#endif
294
295 this->configure_stream_settings_(); // redetermine the settings in case some settings were changed after compilation
296
297 return true;
298}
299
301 if (this->state_ == microphone::STATE_STOPPED || this->is_failed())
302 return;
303
304 xSemaphoreGive(this->active_listeners_semaphore_);
305}
306
307void I2SAudioMicrophone::stop_driver_() {
308 // There is no harm continuing to unload the driver if an error is ever returned by the various functions. This
309 // ensures that we stop/unload the driver when it only partially starts.
310
311 esp_err_t err;
312#ifdef USE_I2S_LEGACY
313#if SOC_I2S_SUPPORTS_ADC
314 if (this->adc_) {
315 err = i2s_adc_disable(this->parent_->get_port());
316 if (err != ESP_OK) {
317 ESP_LOGW(TAG, "Error disabling ADC: %s", esp_err_to_name(err));
318 }
319 }
320#endif
321 err = i2s_stop(this->parent_->get_port());
322 if (err != ESP_OK) {
323 ESP_LOGW(TAG, "Error stopping: %s", esp_err_to_name(err));
324 }
325 err = i2s_driver_uninstall(this->parent_->get_port());
326 if (err != ESP_OK) {
327 ESP_LOGW(TAG, "Error uninstalling driver: %s", esp_err_to_name(err));
328 }
329#else
330 if (this->rx_handle_ != nullptr) {
331 /* Have to stop the channel before deleting it */
332 err = i2s_channel_disable(this->rx_handle_);
333 if (err != ESP_OK) {
334 ESP_LOGW(TAG, "Error stopping: %s", esp_err_to_name(err));
335 }
336 /* If the handle is not needed any more, delete it to release the channel resources */
337 err = i2s_del_channel(this->rx_handle_);
338 if (err != ESP_OK) {
339 ESP_LOGW(TAG, "Error deleting channel: %s", esp_err_to_name(err));
340 }
341 this->rx_handle_ = nullptr;
342 }
343#endif
344 if (this->locked_driver_) {
345 this->parent_->unlock();
346 this->locked_driver_ = false;
347 }
348}
349
351 I2SAudioMicrophone *this_microphone = (I2SAudioMicrophone *) params;
352 xEventGroupSetBits(this_microphone->event_group_, MicrophoneEventGroupBits::TASK_STARTING);
353
354 { // Ensures the samples vector is freed when the task stops
355
356 const size_t bytes_to_read = this_microphone->audio_stream_info_.ms_to_bytes(READ_DURATION_MS);
357 std::vector<uint8_t> samples;
358 samples.reserve(bytes_to_read);
359
360 xEventGroupSetBits(this_microphone->event_group_, MicrophoneEventGroupBits::TASK_RUNNING);
361
362 while (!(xEventGroupGetBits(this_microphone->event_group_) & MicrophoneEventGroupBits::COMMAND_STOP)) {
363 if (this_microphone->data_callbacks_.size() > 0) {
364 samples.resize(bytes_to_read);
365 size_t bytes_read = this_microphone->read_(samples.data(), bytes_to_read, 2 * pdMS_TO_TICKS(READ_DURATION_MS));
366 samples.resize(bytes_read);
367 if (this_microphone->correct_dc_offset_) {
368 this_microphone->fix_dc_offset_(samples);
369 }
370 this_microphone->data_callbacks_.call(samples);
371 } else {
372 vTaskDelay(pdMS_TO_TICKS(READ_DURATION_MS));
373 }
374 }
375 }
376
377 xEventGroupSetBits(this_microphone->event_group_, MicrophoneEventGroupBits::TASK_STOPPED);
378 while (true) {
379 // Continuously delay until the loop method deletes the task
380 vTaskDelay(pdMS_TO_TICKS(10));
381 }
382}
383
384void I2SAudioMicrophone::fix_dc_offset_(std::vector<uint8_t> &data) {
385 const size_t bytes_per_sample = this->audio_stream_info_.samples_to_bytes(1);
386 const uint32_t total_samples = this->audio_stream_info_.bytes_to_samples(data.size());
387
388 if (total_samples == 0) {
389 return;
390 }
391
392 int64_t offset_accumulator = 0;
393 for (uint32_t sample_index = 0; sample_index < total_samples; ++sample_index) {
394 const uint32_t byte_index = sample_index * bytes_per_sample;
395 int32_t sample = audio::unpack_audio_sample_to_q31(&data[byte_index], bytes_per_sample);
396 offset_accumulator += sample;
397 sample -= this->dc_offset_;
398 audio::pack_q31_as_audio_sample(sample, &data[byte_index], bytes_per_sample);
399 }
400
401 const int32_t new_offset = offset_accumulator / total_samples;
402 this->dc_offset_ = new_offset / DC_OFFSET_MOVING_AVERAGE_COEFFICIENT_DENOMINATOR +
403 (DC_OFFSET_MOVING_AVERAGE_COEFFICIENT_DENOMINATOR - 1) * this->dc_offset_ /
404 DC_OFFSET_MOVING_AVERAGE_COEFFICIENT_DENOMINATOR;
405}
406
407size_t I2SAudioMicrophone::read_(uint8_t *buf, size_t len, TickType_t ticks_to_wait) {
408 size_t bytes_read = 0;
409#ifdef USE_I2S_LEGACY
410 esp_err_t err = i2s_read(this->parent_->get_port(), buf, len, &bytes_read, ticks_to_wait);
411#else
412 // i2s_channel_read expects the timeout value in ms, not ticks
413 esp_err_t err = i2s_channel_read(this->rx_handle_, buf, len, &bytes_read, pdTICKS_TO_MS(ticks_to_wait));
414#endif
415 if ((err != ESP_OK) && ((err != ESP_ERR_TIMEOUT) || (ticks_to_wait != 0))) {
416 // Ignore ESP_ERR_TIMEOUT if ticks_to_wait = 0, as it will read the data on the next call
417 if (!this->status_has_warning()) {
418 // Avoid spamming the logs with the error message if its repeated
419 ESP_LOGW(TAG, "Read error: %s", esp_err_to_name(err));
420 }
421 this->status_set_warning();
422 return 0;
423 }
424 if ((bytes_read == 0) && (ticks_to_wait > 0)) {
425 this->status_set_warning();
426 return 0;
427 }
428 this->status_clear_warning();
429#if defined(USE_ESP32_VARIANT_ESP32) and not defined(USE_I2S_LEGACY)
430 // For ESP32 8/16 bit standard mono mode samples need to be switched.
431 if (this->slot_mode_ == I2S_SLOT_MODE_MONO && this->slot_bit_width_ <= 16 && !this->pdm_) {
432 size_t samples_read = bytes_read / sizeof(int16_t);
433 for (int i = 0; i < samples_read; i += 2) {
434 int16_t tmp = buf[i];
435 buf[i] = buf[i + 1];
436 buf[i + 1] = tmp;
437 }
438 }
439#endif
440 return bytes_read;
441}
442
444 uint32_t event_group_bits = xEventGroupGetBits(this->event_group_);
445
446 if (event_group_bits & MicrophoneEventGroupBits::TASK_STARTING) {
447 ESP_LOGV(TAG, "Task started, attempting to allocate buffer");
448 xEventGroupClearBits(this->event_group_, MicrophoneEventGroupBits::TASK_STARTING);
449 }
450
451 if (event_group_bits & MicrophoneEventGroupBits::TASK_RUNNING) {
452 ESP_LOGV(TAG, "Task is running and reading data");
453
454 xEventGroupClearBits(this->event_group_, MicrophoneEventGroupBits::TASK_RUNNING);
456 }
457
458 if ((event_group_bits & MicrophoneEventGroupBits::TASK_STOPPED)) {
459 ESP_LOGV(TAG, "Task finished, freeing resources and uninstalling driver");
460
461 vTaskDelete(this->task_handle_);
462 this->task_handle_ = nullptr;
463 this->stop_driver_();
464 xEventGroupClearBits(this->event_group_, ALL_BITS);
465 this->status_clear_error();
466
468 }
469
470 // Start the microphone if any semaphores are taken
471 if ((uxSemaphoreGetCount(this->active_listeners_semaphore_) < MAX_LISTENERS) &&
474 }
475
476 // Stop the microphone if all semaphores are returned
477 if ((uxSemaphoreGetCount(this->active_listeners_semaphore_) == MAX_LISTENERS) &&
480 }
481
482 switch (this->state_) {
484 if (this->status_has_error()) {
485 break;
486 }
487
488 if (!this->start_driver_()) {
489 ESP_LOGE(TAG, "Driver failed to start; retrying in 1 second");
490 this->status_momentary_error("driver_fail", 1000);
491 this->stop_driver_(); // Stop/frees whatever possibly started
492 break;
493 }
494
495 if (this->task_handle_ == nullptr) {
496 xTaskCreate(I2SAudioMicrophone::mic_task, "mic_task", TASK_STACK_SIZE, (void *) this, TASK_PRIORITY,
497 &this->task_handle_);
498
499 if (this->task_handle_ == nullptr) {
500 ESP_LOGE(TAG, "Task failed to start, retrying in 1 second");
501 this->status_momentary_error("task_fail", 1000);
502 this->stop_driver_(); // Stops the driver to return the lock; will be reloaded in next attempt
503 }
504 }
505
506 break;
508 break;
510 xEventGroupSetBits(this->event_group_, MicrophoneEventGroupBits::COMMAND_STOP);
511 break;
513 break;
514 }
515}
516
517} // namespace i2s_audio
518} // namespace esphome
519
520#endif // USE_ESP32
virtual void mark_failed()
Mark this component as failed.
bool is_failed() const
void status_momentary_error(const std::string &name, uint32_t length=5000)
bool status_has_warning() const
bool status_has_error() const
void status_set_warning(const char *message="unspecified")
void status_clear_warning()
size_t ms_to_bytes(uint32_t ms) const
Converts duration to bytes.
Definition audio.h:73
size_t samples_to_bytes(uint32_t samples) const
Converts samples to bytes.
Definition audio.h:58
uint32_t bytes_to_samples(size_t bytes) const
Convert bytes to samples.
Definition audio.h:48
i2s_std_slot_mask_t std_slot_mask_
Definition i2s_audio.h:45
i2s_slot_bit_width_t slot_bit_width_
Definition i2s_audio.h:46
i2s_bits_per_chan_t bits_per_channel_
Definition i2s_audio.h:41
i2s_mclk_multiple_t mclk_multiple_
Definition i2s_audio.h:50
i2s_bits_per_sample_t bits_per_sample_
Definition i2s_audio.h:40
void configure_stream_settings_()
Starts the I2S driver.
audio::AudioStreamInfo audio_stream_info_
Definition microphone.h:41
CallbackManager< void(const std::vector< uint8_t > &)> data_callbacks_
Definition microphone.h:43
__int64 ssize_t
Definition httplib.h:175
int32_t unpack_audio_sample_to_q31(const uint8_t *data, size_t bytes_per_sample)
Unpacks a quantized audio sample into a Q31 fixed-point number.
Definition audio.h:142
void pack_q31_as_audio_sample(int32_t sample, uint8_t *data, size_t bytes_per_sample)
Packs a Q31 fixed-point number as an audio sample with the specified number of bytes per sample.
Definition audio.h:168
Providing packet encoding functions for exchanging data with a remote host.
Definition a01nyub.cpp:7
std::string size_t len
Definition helpers.h:302