ESPHome: esphome/components/audio/audio_decoder.cpp Source File

#include "audio_decoder.h"


#ifdef USE_ESP32


#include "esphome/core/hal.h"


namespace esphome {

namespace audio {


static const uint32_t DECODING_TIMEOUT_MS = 50;    // The decode function will yield after this duration

static const uint32_t READ_WRITE_TIMEOUT_MS = 20;  // Timeout for transferring audio data


static const uint32_t MAX_POTENTIALLY_FAILED_COUNT = 10;


AudioDecoder::AudioDecoder(size_t input_buffer_size, size_t output_buffer_size) {

  this->input_transfer_buffer_ = AudioSourceTransferBuffer::create(input_buffer_size);

  this->output_transfer_buffer_ = AudioSinkTransferBuffer::create(output_buffer_size);

}


AudioDecoder::~AudioDecoder() {

#ifdef USE_AUDIO_MP3_SUPPORT

  if (this->audio_file_type_ == AudioFileType::MP3) {

    esp_audio_libs::helix_decoder::MP3FreeDecoder(this->mp3_decoder_);

  }

#endif

}


esp_err_t AudioDecoder::add_source(std::weak_ptr<RingBuffer> &input_ring_buffer) {

  if (this->input_transfer_buffer_ != nullptr) {

    this->input_transfer_buffer_->set_source(input_ring_buffer);

    return ESP_OK;

  }

  return ESP_ERR_NO_MEM;

}


esp_err_t AudioDecoder::add_sink(std::weak_ptr<RingBuffer> &output_ring_buffer) {

  if (this->output_transfer_buffer_ != nullptr) {

    this->output_transfer_buffer_->set_sink(output_ring_buffer);

    return ESP_OK;

  }

  return ESP_ERR_NO_MEM;

}


#ifdef USE_SPEAKER


esp_err_t AudioDecoder::add_sink(speaker::Speaker *speaker) {

  if (this->output_transfer_buffer_ != nullptr) {

    this->output_transfer_buffer_->set_sink(speaker);

    return ESP_OK;

  }

  return ESP_ERR_NO_MEM;

}


#endif


esp_err_t AudioDecoder::start(AudioFileType audio_file_type) {

  if ((this->input_transfer_buffer_ == nullptr) || (this->output_transfer_buffer_ == nullptr)) {

    return ESP_ERR_NO_MEM;

  }


  this->audio_file_type_ = audio_file_type;


  this->potentially_failed_count_ = 0;

  this->end_of_file_ = false;


  switch (this->audio_file_type_) {

#ifdef USE_AUDIO_FLAC_SUPPORT

    case AudioFileType::FLAC:

      this->flac_decoder_ = make_unique<esp_audio_libs::flac::FLACDecoder>();

      this->free_buffer_required_ =

          this->output_transfer_buffer_->capacity();  // Adjusted and reallocated after reading the header

      break;

#endif

#ifdef USE_AUDIO_MP3_SUPPORT

    case AudioFileType::MP3:

      this->mp3_decoder_ = esp_audio_libs::helix_decoder::MP3InitDecoder();


      // MP3 always has 1152 samples per chunk

      this->free_buffer_required_ = 1152 * sizeof(int16_t) * 2;  // samples * size per sample * channels


      // Always reallocate the output transfer buffer to the smallest necessary size

      this->output_transfer_buffer_->reallocate(this->free_buffer_required_);

      break;

#endif

    case AudioFileType::WAV:

      this->wav_decoder_ = make_unique<esp_audio_libs::wav_decoder::WAVDecoder>();

      this->wav_decoder_->reset();


      // Processing WAVs doesn't actually require a specific amount of buffer size, as it is already in PCM format.

      // Thus, we don't reallocate to a minimum size.

      this->free_buffer_required_ = 1024;

      if (this->output_transfer_buffer_->capacity() < this->free_buffer_required_) {

        this->output_transfer_buffer_->reallocate(this->free_buffer_required_);

      }

      break;

    case AudioFileType::NONE:

    default:

      return ESP_ERR_NOT_SUPPORTED;

      break;

  }


  return ESP_OK;

}


AudioDecoderState AudioDecoder::decode(bool stop_gracefully) {

  if (stop_gracefully) {

    if (this->output_transfer_buffer_->available() == 0) {

      if (this->end_of_file_) {

        // The file decoder indicates it reached the end of file

        return AudioDecoderState::FINISHED;

      }


      if (!this->input_transfer_buffer_->has_buffered_data()) {

        // If all the internal buffers are empty, the decoding is done

        return AudioDecoderState::FINISHED;

      }

    }

  }


  if (this->potentially_failed_count_ > MAX_POTENTIALLY_FAILED_COUNT) {

    if (stop_gracefully) {

      // No more new data is going to come in, so decoding is done

      return AudioDecoderState::FINISHED;

    }

    return AudioDecoderState::FAILED;

  }


  FileDecoderState state = FileDecoderState::MORE_TO_PROCESS;


  uint32_t decoding_start = millis();


  bool first_loop_iteration = true;


  size_t bytes_processed = 0;

  size_t bytes_available_before_processing = 0;


  while (state == FileDecoderState::MORE_TO_PROCESS) {

    // Transfer decoded out

    if (!this->pause_output_) {

      // Never shift the data in the output transfer buffer to avoid unnecessary, slow data moves

      size_t bytes_written =

          this->output_transfer_buffer_->transfer_data_to_sink(pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS), false);


      if (this->audio_stream_info_.has_value()) {

        this->accumulated_frames_written_ += this->audio_stream_info_.value().bytes_to_frames(bytes_written);

        this->playback_ms_ +=

            this->audio_stream_info_.value().frames_to_milliseconds_with_remainder(&this->accumulated_frames_written_);

      }

    } else {

      // If paused, block to avoid wasting CPU resources

      delay(READ_WRITE_TIMEOUT_MS);

    }


    // Verify there is enough space to store more decoded audio and that the function hasn't been running too long

    if ((this->output_transfer_buffer_->free() < this->free_buffer_required_) ||

        (millis() - decoding_start > DECODING_TIMEOUT_MS)) {

      return AudioDecoderState::DECODING;

    }


    // Decode more audio


    // Only shift data on the first loop iteration to avoid unnecessary, slow moves

    size_t bytes_read = this->input_transfer_buffer_->transfer_data_from_source(pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS),

                                                                                first_loop_iteration);


    if (!first_loop_iteration && (this->input_transfer_buffer_->available() < bytes_processed)) {

      // Less data is available than what was processed in last iteration, so don't attempt to decode.

      // This attempts to avoid the decoder from consistently trying to decode an incomplete frame. The transfer buffer

      // will shift the remaining data to the start and copy more from the source the next time the decode function is

      // called

      break;

    }


    bytes_available_before_processing = this->input_transfer_buffer_->available();


    if ((this->potentially_failed_count_ > 0) && (bytes_read == 0)) {

      // Failed to decode in last attempt and there is no new data


      if ((this->input_transfer_buffer_->free() == 0) && first_loop_iteration) {

        // The input buffer is full. Since it previously failed on the exact same data, we can never recover

        state = FileDecoderState::FAILED;

      } else {

        // Attempt to get more data next time

        state = FileDecoderState::IDLE;

      }

    } else if (this->input_transfer_buffer_->available() == 0) {

      // No data to decode, attempt to get more data next time

      state = FileDecoderState::IDLE;

    } else {

      switch (this->audio_file_type_) {

#ifdef USE_AUDIO_FLAC_SUPPORT

        case AudioFileType::FLAC:

          state = this->decode_flac_();

          break;

#endif

#ifdef USE_AUDIO_MP3_SUPPORT

        case AudioFileType::MP3:

          state = this->decode_mp3_();

          break;

#endif

        case AudioFileType::WAV:

          state = this->decode_wav_();

          break;

        case AudioFileType::NONE:

        default:

          state = FileDecoderState::IDLE;

          break;

      }

    }


    first_loop_iteration = false;

    bytes_processed = bytes_available_before_processing - this->input_transfer_buffer_->available();


    if (state == FileDecoderState::POTENTIALLY_FAILED) {

      ++this->potentially_failed_count_;

    } else if (state == FileDecoderState::END_OF_FILE) {

      this->end_of_file_ = true;

    } else if (state == FileDecoderState::FAILED) {

      return AudioDecoderState::FAILED;

    } else if (state == FileDecoderState::MORE_TO_PROCESS) {

      this->potentially_failed_count_ = 0;

    }

  }

  return AudioDecoderState::DECODING;

}


#ifdef USE_AUDIO_FLAC_SUPPORT


FileDecoderState AudioDecoder::decode_flac_() {

  if (!this->audio_stream_info_.has_value()) {

    // Header hasn't been read

    auto result = this->flac_decoder_->read_header(this->input_transfer_buffer_->get_buffer_start(),

                                                   this->input_transfer_buffer_->available());


    if (result == esp_audio_libs::flac::FLAC_DECODER_HEADER_OUT_OF_DATA) {

      return FileDecoderState::POTENTIALLY_FAILED;

    }


    if (result != esp_audio_libs::flac::FLAC_DECODER_SUCCESS) {

      // Couldn't read FLAC header

      return FileDecoderState::FAILED;

    }


    size_t bytes_consumed = this->flac_decoder_->get_bytes_index();

    this->input_transfer_buffer_->decrease_buffer_length(bytes_consumed);


    // Reallocate the output transfer buffer to the smallest necessary size

    this->free_buffer_required_ = flac_decoder_->get_output_buffer_size_bytes();

    if (!this->output_transfer_buffer_->reallocate(this->free_buffer_required_)) {

      // Couldn't reallocate output buffer

      return FileDecoderState::FAILED;

    }


    this->audio_stream_info_ =

        audio::AudioStreamInfo(this->flac_decoder_->get_sample_depth(), this->flac_decoder_->get_num_channels(),

                               this->flac_decoder_->get_sample_rate());


    return FileDecoderState::MORE_TO_PROCESS;

  }


  uint32_t output_samples = 0;

  auto result = this->flac_decoder_->decode_frame(

      this->input_transfer_buffer_->get_buffer_start(), this->input_transfer_buffer_->available(),

      reinterpret_cast<int16_t *>(this->output_transfer_buffer_->get_buffer_end()), &output_samples);


  if (result == esp_audio_libs::flac::FLAC_DECODER_ERROR_OUT_OF_DATA) {

    // Not an issue, just needs more data that we'll get next time.

    return FileDecoderState::POTENTIALLY_FAILED;

  }


  size_t bytes_consumed = this->flac_decoder_->get_bytes_index();

  this->input_transfer_buffer_->decrease_buffer_length(bytes_consumed);


  if (result > esp_audio_libs::flac::FLAC_DECODER_ERROR_OUT_OF_DATA) {

    // Corrupted frame, don't retry with current buffer content, wait for new sync

    return FileDecoderState::POTENTIALLY_FAILED;

  }


  // We have successfully decoded some input data and have new output data

  this->output_transfer_buffer_->increase_buffer_length(

      this->audio_stream_info_.value().samples_to_bytes(output_samples));


  if (result == esp_audio_libs::flac::FLAC_DECODER_NO_MORE_FRAMES) {

    return FileDecoderState::END_OF_FILE;

  }


  return FileDecoderState::MORE_TO_PROCESS;

}


#endif


#ifdef USE_AUDIO_MP3_SUPPORT


FileDecoderState AudioDecoder::decode_mp3_() {

  // Look for the next sync word

  int buffer_length = (int) this->input_transfer_buffer_->available();

  int32_t offset =

      esp_audio_libs::helix_decoder::MP3FindSyncWord(this->input_transfer_buffer_->get_buffer_start(), buffer_length);


  if (offset < 0) {

    // New data may have the sync word

    this->input_transfer_buffer_->decrease_buffer_length(buffer_length);

    return FileDecoderState::POTENTIALLY_FAILED;

  }


  // Advance read pointer to match the offset for the syncword

  this->input_transfer_buffer_->decrease_buffer_length(offset);

  uint8_t *buffer_start = this->input_transfer_buffer_->get_buffer_start();


  buffer_length = (int) this->input_transfer_buffer_->available();

  int err = esp_audio_libs::helix_decoder::MP3Decode(this->mp3_decoder_, &buffer_start, &buffer_length,

                                                     (int16_t *) this->output_transfer_buffer_->get_buffer_end(), 0);


  size_t consumed = this->input_transfer_buffer_->available() - buffer_length;

  this->input_transfer_buffer_->decrease_buffer_length(consumed);


  if (err) {

    switch (err) {

      case esp_audio_libs::helix_decoder::ERR_MP3_OUT_OF_MEMORY:

        // Intentional fallthrough

      case esp_audio_libs::helix_decoder::ERR_MP3_NULL_POINTER:

        return FileDecoderState::FAILED;

        break;

      default:

        // Most errors are recoverable by moving on to the next frame, so mark as potentailly failed

        return FileDecoderState::POTENTIALLY_FAILED;

        break;

    }

  } else {

    esp_audio_libs::helix_decoder::MP3FrameInfo mp3_frame_info;

    esp_audio_libs::helix_decoder::MP3GetLastFrameInfo(this->mp3_decoder_, &mp3_frame_info);

    if (mp3_frame_info.outputSamps > 0) {

      int bytes_per_sample = (mp3_frame_info.bitsPerSample / 8);

      this->output_transfer_buffer_->increase_buffer_length(mp3_frame_info.outputSamps * bytes_per_sample);


      if (!this->audio_stream_info_.has_value()) {

        this->audio_stream_info_ =

            audio::AudioStreamInfo(mp3_frame_info.bitsPerSample, mp3_frame_info.nChans, mp3_frame_info.samprate);

      }

    }

  }


  return FileDecoderState::MORE_TO_PROCESS;

}


#endif


FileDecoderState AudioDecoder::decode_wav_() {

  if (!this->audio_stream_info_.has_value()) {

    // Header hasn't been processed


    esp_audio_libs::wav_decoder::WAVDecoderResult result = this->wav_decoder_->decode_header(

        this->input_transfer_buffer_->get_buffer_start(), this->input_transfer_buffer_->available());


    if (result == esp_audio_libs::wav_decoder::WAV_DECODER_SUCCESS_IN_DATA) {

      this->input_transfer_buffer_->decrease_buffer_length(this->wav_decoder_->bytes_processed());


      this->audio_stream_info_ = audio::AudioStreamInfo(

          this->wav_decoder_->bits_per_sample(), this->wav_decoder_->num_channels(), this->wav_decoder_->sample_rate());


      this->wav_bytes_left_ = this->wav_decoder_->chunk_bytes_left();

      this->wav_has_known_end_ = (this->wav_bytes_left_ > 0);

      return FileDecoderState::MORE_TO_PROCESS;

    } else if (result == esp_audio_libs::wav_decoder::WAV_DECODER_WARNING_INCOMPLETE_DATA) {

      // Available data didn't have the full header

      return FileDecoderState::POTENTIALLY_FAILED;

    } else {

      return FileDecoderState::FAILED;

    }

  } else {

    if (!this->wav_has_known_end_ || (this->wav_bytes_left_ > 0)) {

      size_t bytes_to_copy = this->input_transfer_buffer_->available();


      if (this->wav_has_known_end_) {

        bytes_to_copy = std::min(bytes_to_copy, this->wav_bytes_left_);

      }


      bytes_to_copy = std::min(bytes_to_copy, this->output_transfer_buffer_->free());


      if (bytes_to_copy > 0) {

        std::memcpy(this->output_transfer_buffer_->get_buffer_end(), this->input_transfer_buffer_->get_buffer_start(),

                    bytes_to_copy);

        this->input_transfer_buffer_->decrease_buffer_length(bytes_to_copy);

        this->output_transfer_buffer_->increase_buffer_length(bytes_to_copy);

        if (this->wav_has_known_end_) {

          this->wav_bytes_left_ -= bytes_to_copy;

        }

      }

      return FileDecoderState::IDLE;

    }

  }


  return FileDecoderState::END_OF_FILE;

}


}  // namespace audio

}  // namespace esphome


#endif

audio_decoder.h

esphome::audio::AudioDecoder::audio_stream_info_
optional< AudioStreamInfo > audio_stream_info_
Definition audio_decoder.h:118

esphome::audio::AudioDecoder::accumulated_frames_written_
uint32_t accumulated_frames_written_
Definition audio_decoder.h:129

esphome::audio::AudioDecoder::start
esp_err_t start(AudioFileType audio_file_type)
Sets up decoding the file.
Definition audio_decoder.cpp:54

esphome::audio::AudioDecoder::mp3_decoder_
esp_audio_libs::helix_decoder::HMP3Decoder mp3_decoder_
Definition audio_decoder.h:110

esphome::audio::AudioDecoder::playback_ms_
uint32_t playback_ms_
Definition audio_decoder.h:130

esphome::audio::AudioDecoder::wav_has_known_end_
bool wav_has_known_end_
Definition audio_decoder.h:125

esphome::audio::AudioDecoder::pause_output_
bool pause_output_
Definition audio_decoder.h:127

esphome::audio::AudioDecoder::potentially_failed_count_
uint32_t potentially_failed_count_
Definition audio_decoder.h:123

esphome::audio::AudioDecoder::free_buffer_required_
size_t free_buffer_required_
Definition audio_decoder.h:120

esphome::audio::AudioDecoder::flac_decoder_
std::unique_ptr< esp_audio_libs::flac::FLACDecoder > flac_decoder_
Definition audio_decoder.h:106

esphome::audio::AudioDecoder::~AudioDecoder
~AudioDecoder()
Deallocates the MP3 decoder (the flac and wav decoders are deallocated automatically)
Definition audio_decoder.cpp:20

esphome::audio::AudioDecoder::output_transfer_buffer_
std::unique_ptr< AudioSinkTransferBuffer > output_transfer_buffer_
Definition audio_decoder.h:115

esphome::audio::AudioDecoder::audio_file_type_
AudioFileType audio_file_type_
Definition audio_decoder.h:117

esphome::audio::AudioDecoder::decode_flac_
FileDecoderState decode_flac_()
Definition audio_decoder.cpp:226

esphome::audio::AudioDecoder::add_source
esp_err_t add_source(std::weak_ptr< RingBuffer > &input_ring_buffer)
Adds a source ring buffer for raw file data.
Definition audio_decoder.cpp:28

esphome::audio::AudioDecoder::end_of_file_
bool end_of_file_
Definition audio_decoder.h:124

esphome::audio::AudioDecoder::wav_decoder_
std::unique_ptr< esp_audio_libs::wav_decoder::WAVDecoder > wav_decoder_
Definition audio_decoder.h:103

esphome::audio::AudioDecoder::add_sink
esp_err_t add_sink(std::weak_ptr< RingBuffer > &output_ring_buffer)
Adds a sink ring buffer for decoded audio.
Definition audio_decoder.cpp:36

esphome::audio::AudioDecoder::input_transfer_buffer_
std::unique_ptr< AudioSourceTransferBuffer > input_transfer_buffer_
Definition audio_decoder.h:114

esphome::audio::AudioDecoder::decode_wav_
FileDecoderState decode_wav_()
Definition audio_decoder.cpp:342

esphome::audio::AudioDecoder::decode
AudioDecoderState decode(bool stop_gracefully)
Decodes audio from the ring buffer source and writes to the sink.
Definition audio_decoder.cpp:103

esphome::audio::AudioDecoder::decode_mp3_
FileDecoderState decode_mp3_()
Definition audio_decoder.cpp:289

esphome::audio::AudioDecoder::AudioDecoder
AudioDecoder(size_t input_buffer_size, size_t output_buffer_size)
Allocates the input and output transfer buffers.
Definition audio_decoder.cpp:15

esphome::audio::AudioDecoder::wav_bytes_left_
size_t wav_bytes_left_
Definition audio_decoder.h:121

esphome::audio::AudioSinkTransferBuffer::create
static std::unique_ptr< AudioSinkTransferBuffer > create(size_t buffer_size)
Creates a new sink transfer buffer.
Definition audio_transfer_buffer.cpp:12

esphome::audio::AudioSourceTransferBuffer::create
static std::unique_ptr< AudioSourceTransferBuffer > create(size_t buffer_size)
Creates a new source transfer buffer.
Definition audio_transfer_buffer.cpp:22

esphome::audio::AudioStreamInfo
Definition audio.h:11

esphome::speaker::Speaker
Definition speaker.h:29

state
bool state
Definition fan.h:0

hal.h

esphome::audio::AudioDecoderState
AudioDecoderState
Definition audio_decoder.h:30

esphome::audio::AudioDecoderState::FINISHED
@ FINISHED

esphome::audio::AudioDecoderState::DECODING
@ DECODING

esphome::audio::AudioDecoderState::FAILED
@ FAILED

esphome::audio::FileDecoderState
FileDecoderState
Definition audio_decoder.h:37

esphome::audio::FileDecoderState::POTENTIALLY_FAILED
@ POTENTIALLY_FAILED

esphome::audio::FileDecoderState::END_OF_FILE
@ END_OF_FILE

esphome::audio::FileDecoderState::MORE_TO_PROCESS
@ MORE_TO_PROCESS

esphome::audio::FileDecoderState::IDLE
@ IDLE

esphome::audio::FileDecoderState::FAILED
@ FAILED

esphome::audio::AudioFileType
AudioFileType
Definition audio.h:108

esphome::audio::AudioFileType::MP3
@ MP3

esphome::audio::AudioFileType::WAV
@ WAV

esphome::audio::AudioFileType::FLAC
@ FLAC

esphome::audio::AudioFileType::NONE
@ NONE

esphome
Providing packet encoding functions for exchanging data with a remote host.
Definition a01nyub.cpp:7

esphome::make_unique
std::unique_ptr< T > make_unique(Args &&...args)
Definition helpers.h:86

esphome::delay
void IRAM_ATTR HOT delay(uint32_t ms)
Definition core.cpp:29

esphome::millis
uint32_t IRAM_ATTR HOT millis()
Definition core.cpp:28