ESP-ADF microphone and speaker board support for s3-box

This commit is contained in:
Jesse Hills 2023-08-11 07:53:34 +12:00
parent db9dc11022
commit 3635179564
No known key found for this signature in database
GPG Key ID: BEAAE804EFD8E83A
10 changed files with 753 additions and 0 deletions

View File

@ -0,0 +1,60 @@
import esphome.config_validation as cv
import esphome.codegen as cg
from esphome.components import esp32
from esphome.const import CONF_ID
# CONFLICTS_WITH = ["i2s_audio"]
DEPENDENCIES = ["esp32"]
CONF_ESP_ADF_ID = "esp_adf_id"
esp_adf_ns = cg.esphome_ns.namespace("esp_adf")
ESPADF = esp_adf_ns.class_("ESPADF", cg.Component)
ESPADFPipeline = esp_adf_ns.class_("ESPADFPipeline", cg.Parented.template(ESPADF))
SUPPORTED_BOARDS = {"esp32s3box": "CONFIG_ESP32_S3_BOX_BOARD"}
def _validate_board(config):
board = esp32.get_board()
if board not in SUPPORTED_BOARDS:
raise cv.Invalid(f"Board {board} is not supported by esp-adf")
return config
CONFIG_SCHEMA = cv.All(
cv.Schema({cv.GenerateID(): cv.declare_id(ESPADF)}),
_validate_board,
cv.only_with_esp_idf,
)
async def to_code(config):
var = cg.new_Pvariable(config[CONF_ID])
await cg.register_component(var, config)
cg.add_define("USE_ESP_ADF")
cg.add_platformio_option("build_unflags", "-Wl,--end-group")
esp32.add_idf_component(
name="esp-adf",
repo="https://github.com/espressif/esp-adf",
path="components",
ref="v2.5",
components=["*"],
submodules=["components/esp-sr", "components/esp-adf-libs"],
)
esp32.add_idf_component(
name="esp-dsp",
repo="https://github.com/espressif/esp-dsp",
ref="v1.2.0",
)
cg.add_platformio_option(
"board_build.embed_txtfiles", "components/dueros_service/duer_profile"
)
esp32.add_idf_sdkconfig_option(SUPPORTED_BOARDS[esp32.get_board()], True)

View File

@ -0,0 +1,26 @@
#include "esp_adf.h"
#ifdef USE_ESP_IDF
#include <board.h>
#include "esphome/core/log.h"
namespace esphome {
namespace esp_adf {
static const char *const TAG = "esp_adf";
void ESPADF::setup() {
ESP_LOGI(TAG, "Start codec chip");
audio_board_handle_t board_handle = audio_board_init();
audio_hal_ctrl_codec(board_handle->audio_hal, AUDIO_HAL_CODEC_MODE_BOTH, AUDIO_HAL_CTRL_START);
}
float ESPADF::get_setup_priority() const { return setup_priority::HARDWARE; }
} // namespace esp_adf
} // namespace esphome
#endif

View File

@ -0,0 +1,31 @@
#pragma once
#ifdef USE_ESP_IDF
#include "esphome/core/component.h"
#include "esphome/core/helpers.h"
namespace esphome {
namespace esp_adf {
class ESPADF;
class ESPADFPipeline : public Parented<ESPADF> {};
class ESPADF : public Component {
public:
void setup() override;
float get_setup_priority() const override;
void lock() { this->lock_.lock(); }
bool try_lock() { return this->lock_.try_lock(); }
void unlock() { this->lock_.unlock(); }
protected:
Mutex lock_;
};
} // namespace esp_adf
} // namespace esphome
#endif

View File

@ -0,0 +1,33 @@
import esphome.codegen as cg
import esphome.config_validation as cv
from esphome.components import microphone
from esphome.const import CONF_ID
from .. import CONF_ESP_ADF_ID, ESPADF, ESPADFPipeline, esp_adf_ns
AUTO_LOAD = ["esp_adf"]
CODEOWNERS = ["@jesserockz"]
DEPENDENCIES = ["esp32"]
ESPADFMicrophone = esp_adf_ns.class_(
"ESPADFMicrophone", ESPADFPipeline, microphone.Microphone, cg.Component
)
CONFIG_SCHEMA = cv.All(
microphone.MICROPHONE_SCHEMA.extend(
{
cv.GenerateID(): cv.declare_id(ESPADFMicrophone),
cv.GenerateID(CONF_ESP_ADF_ID): cv.use_id(ESPADF),
}
).extend(cv.COMPONENT_SCHEMA),
cv.only_with_esp_idf,
)
async def to_code(config):
var = cg.new_Pvariable(config[CONF_ID])
await cg.register_component(var, config)
await cg.register_parented(var, config[CONF_ESP_ADF_ID])
await microphone.register_microphone(var, config)

View File

@ -0,0 +1,192 @@
#include "esp_adf_microphone.h"
#ifdef USE_ESP32
#include <driver/i2s.h>
#include "esphome/core/hal.h"
#include "esphome/core/log.h"
#include <audio_hal.h>
#include <filter_resample.h>
#include <i2s_stream.h>
#include <raw_stream.h>
namespace esphome {
namespace esp_adf {
static const size_t BUFFER_SIZE = 1024;
static const char *const TAG = "esp_adf.microphone";
void ESPADFMicrophone::start() {
if (this->is_failed())
return;
if (this->state_ == microphone::STATE_STOPPING) {
ESP_LOGW(TAG, "Microphone is stopping, cannot start.");
return;
}
this->state_ = microphone::STATE_STARTING;
}
void ESPADFMicrophone::start_() {
if (!this->parent_->try_lock()) {
return;
}
i2s_driver_config_t i2s_config = {
.mode = (i2s_mode_t) (I2S_MODE_MASTER | I2S_MODE_RX),
.sample_rate = 44100,
.bits_per_sample = I2S_BITS_PER_SAMPLE_16BIT,
.channel_format = I2S_CHANNEL_FMT_RIGHT_LEFT,
.communication_format = I2S_COMM_FORMAT_STAND_I2S,
.intr_alloc_flags = ESP_INTR_FLAG_LEVEL2 | ESP_INTR_FLAG_IRAM,
.dma_buf_count = 3,
.dma_buf_len = 300,
.use_apll = false,
.tx_desc_auto_clear = true,
.fixed_mclk = 0,
.mclk_multiple = I2S_MCLK_MULTIPLE_DEFAULT,
.bits_per_chan = I2S_BITS_PER_CHAN_DEFAULT,
};
ESP_LOGI(TAG, "Init pipeline");
audio_pipeline_cfg_t pipeline_cfg = {
.rb_size = 8 * 1024,
};
this->pipeline_ = audio_pipeline_init(&pipeline_cfg);
ESP_LOGI(TAG, "Init i2s stream");
i2s_stream_cfg_t i2s_cfg = {
.type = AUDIO_STREAM_READER,
.i2s_config = i2s_config,
.i2s_port = I2S_NUM_0,
.use_alc = false,
.volume = 0,
.out_rb_size = I2S_STREAM_RINGBUFFER_SIZE,
.task_stack = I2S_STREAM_TASK_STACK,
.task_core = I2S_STREAM_TASK_CORE,
.task_prio = I2S_STREAM_TASK_PRIO,
.stack_in_ext = false,
.multi_out_num = 0,
.uninstall_drv = true,
.need_expand = false,
.expand_src_bits = I2S_BITS_PER_SAMPLE_16BIT,
};
this->i2s_stream_reader_ = i2s_stream_init(&i2s_cfg);
ESP_LOGI(TAG, "Init filter");
rsp_filter_cfg_t rsp_cfg = {
.src_rate = 44100,
.src_ch = 2,
.dest_rate = 16000,
.dest_bits = 16,
.dest_ch = 1,
.src_bits = 16,
.mode = RESAMPLE_DECODE_MODE,
.max_indata_bytes = RSP_FILTER_BUFFER_BYTE,
.out_len_bytes = RSP_FILTER_BUFFER_BYTE,
.type = ESP_RESAMPLE_TYPE_AUTO,
.complexity = 2,
.down_ch_idx = 0,
.prefer_flag = ESP_RSP_PREFER_TYPE_SPEED,
.out_rb_size = RSP_FILTER_RINGBUFFER_SIZE,
.task_stack = RSP_FILTER_TASK_STACK,
.task_core = RSP_FILTER_TASK_CORE,
.task_prio = RSP_FILTER_TASK_PRIO,
.stack_in_ext = true,
};
this->filter_ = rsp_filter_init(&rsp_cfg);
ESP_LOGI(TAG, "Init raw stream");
raw_stream_cfg_t raw_cfg = {
.type = AUDIO_STREAM_READER,
.out_rb_size = 8 * 1024,
};
this->raw_read_ = raw_stream_init(&raw_cfg);
ESP_LOGI(TAG, "Register all elements to audio pipeline");
audio_pipeline_register(this->pipeline_, this->i2s_stream_reader_, "i2s");
audio_pipeline_register(this->pipeline_, this->filter_, "filter");
audio_pipeline_register(this->pipeline_, this->raw_read_, "raw");
const char *link_tag[3] = {"i2s", "filter", "raw"};
audio_pipeline_link(this->pipeline_, &link_tag[0], 3);
ESP_LOGI(TAG, "Starting pipeline");
audio_pipeline_run(this->pipeline_);
this->state_ = microphone::STATE_RUNNING;
}
void ESPADFMicrophone::stop() {
if (this->state_ == microphone::STATE_STOPPED || this->is_failed())
return;
if (this->state_ == microphone::STATE_STARTING) {
this->state_ = microphone::STATE_STOPPED;
return;
}
this->state_ = microphone::STATE_STOPPING;
}
void ESPADFMicrophone::stop_() {
ESP_LOGD(TAG, "Stopping microphone");
audio_pipeline_stop(this->pipeline_);
audio_pipeline_wait_for_stop(this->pipeline_);
audio_pipeline_terminate(this->pipeline_);
audio_pipeline_unregister(this->pipeline_, this->i2s_stream_reader_);
audio_pipeline_unregister(this->pipeline_, this->filter_);
audio_pipeline_unregister(this->pipeline_, this->raw_read_);
audio_pipeline_deinit(this->pipeline_);
audio_element_deinit(this->i2s_stream_reader_);
audio_element_deinit(this->filter_);
audio_element_deinit(this->raw_read_);
this->parent_->unlock();
this->state_ = microphone::STATE_STOPPED;
ESP_LOGD(TAG, "Microphone stopped");
}
size_t ESPADFMicrophone::read(int16_t *buf, size_t len) {
int bytes_read = raw_stream_read(this->raw_read_, (char *) buf, len);
if (bytes_read == ESP_FAIL) {
ESP_LOGW(TAG, "Error reading from I2S microphone");
this->status_set_warning();
return ESP_FAIL;
}
this->status_clear_warning();
return bytes_read;
}
void ESPADFMicrophone::read_() {
std::vector<int16_t> samples;
samples.resize(BUFFER_SIZE);
this->read(samples.data(), samples.size());
this->data_callbacks_.call(samples);
}
void ESPADFMicrophone::loop() {
switch (this->state_) {
case microphone::STATE_STOPPED:
break;
case microphone::STATE_STARTING:
this->start_();
break;
case microphone::STATE_RUNNING:
if (this->data_callbacks_.size() > 0) {
this->read_();
}
break;
case microphone::STATE_STOPPING:
this->stop_();
break;
}
}
} // namespace esp_adf
} // namespace esphome
#endif // USE_ESP32

View File

@ -0,0 +1,37 @@
#pragma once
#ifdef USE_ESP32
#include "../esp_adf.h"
#include "esphome/components/microphone/microphone.h"
#include "esphome/core/component.h"
#include <audio_element.h>
#include <audio_pipeline.h>
namespace esphome {
namespace esp_adf {
class ESPADFMicrophone : public ESPADFPipeline, public microphone::Microphone, public Component {
public:
void start() override;
void stop() override;
void loop() override;
size_t read(int16_t *buf, size_t len) override;
protected:
void start_();
void stop_();
void read_();
audio_pipeline_handle_t pipeline_;
audio_element_handle_t i2s_stream_reader_, filter_, raw_read_;
};
} // namespace esp_adf
} // namespace esphome
#endif // USE_ESP32

View File

@ -0,0 +1,33 @@
import esphome.codegen as cg
import esphome.config_validation as cv
from esphome.components import speaker
from esphome.const import CONF_ID
from .. import CONF_ESP_ADF_ID, ESPADF, ESPADFPipeline, esp_adf_ns
AUTO_LOAD = ["esp_adf"]
CODEOWNERS = ["@jesserockz"]
DEPENDENCIES = ["esp32"]
ESPADFSpeaker = esp_adf_ns.class_(
"ESPADFSpeaker", ESPADFPipeline, speaker.Speaker, cg.Component
)
CONFIG_SCHEMA = cv.All(
cv.Schema(
{
cv.GenerateID(): cv.declare_id(ESPADFSpeaker),
cv.GenerateID(CONF_ESP_ADF_ID): cv.use_id(ESPADF),
}
).extend(cv.COMPONENT_SCHEMA),
cv.only_with_esp_idf,
)
async def to_code(config):
var = cg.new_Pvariable(config[CONF_ID])
await cg.register_component(var, config)
await cg.register_parented(var, config[CONF_ESP_ADF_ID])
await speaker.register_speaker(var, config)

View File

@ -0,0 +1,272 @@
#include "esp_adf_speaker.h"
#ifdef USE_ESP32
#include <driver/i2s.h>
#include "esphome/core/application.h"
#include "esphome/core/hal.h"
#include "esphome/core/log.h"
#include <audio_hal.h>
#include <filter_resample.h>
#include <i2s_stream.h>
#include <raw_stream.h>
namespace esphome {
namespace esp_adf {
static const size_t BUFFER_COUNT = 50;
static const char *const TAG = "esp_adf.speaker";
void ESPADFSpeaker::setup() {
ESP_LOGCONFIG(TAG, "Setting up ESP ADF Speaker...");
this->buffer_queue_ = xQueueCreate(BUFFER_COUNT, sizeof(DataEvent));
this->event_queue_ = xQueueCreate(20, sizeof(TaskEvent));
}
void ESPADFSpeaker::start() { this->state_ = speaker::STATE_STARTING; }
void ESPADFSpeaker::start_() {
if (!this->parent_->try_lock()) {
return; // Waiting for another i2s component to return lock
}
this->state_ = speaker::STATE_RUNNING;
xTaskCreate(ESPADFSpeaker::player_task, "speaker_task", 8192, (void *) this, 0, &this->player_task_handle_);
}
void ESPADFSpeaker::player_task(void *params) {
ESPADFSpeaker *this_speaker = (ESPADFSpeaker *) params;
TaskEvent event;
event.type = TaskEventType::STARTING;
xQueueSend(this_speaker->event_queue_, &event, portMAX_DELAY);
i2s_driver_config_t i2s_config = {
.mode = (i2s_mode_t) (I2S_MODE_MASTER | I2S_MODE_TX),
.sample_rate = 16000,
.bits_per_sample = I2S_BITS_PER_SAMPLE_16BIT,
.channel_format = I2S_CHANNEL_FMT_ONLY_RIGHT,
.communication_format = I2S_COMM_FORMAT_STAND_I2S,
.intr_alloc_flags = ESP_INTR_FLAG_LEVEL2 | ESP_INTR_FLAG_IRAM,
.dma_buf_count = 8,
.dma_buf_len = 1024,
.use_apll = false,
.tx_desc_auto_clear = true,
.fixed_mclk = 0,
.mclk_multiple = I2S_MCLK_MULTIPLE_DEFAULT,
.bits_per_chan = I2S_BITS_PER_CHAN_DEFAULT,
};
audio_pipeline_cfg_t pipeline_cfg = {
.rb_size = 8 * 1024,
};
audio_pipeline_handle_t pipeline = audio_pipeline_init(&pipeline_cfg);
i2s_stream_cfg_t i2s_cfg = {
.type = AUDIO_STREAM_WRITER,
.i2s_config = i2s_config,
.i2s_port = I2S_NUM_0,
.use_alc = false,
.volume = 0,
.out_rb_size = I2S_STREAM_RINGBUFFER_SIZE,
.task_stack = I2S_STREAM_TASK_STACK,
.task_core = I2S_STREAM_TASK_CORE,
.task_prio = I2S_STREAM_TASK_PRIO,
.stack_in_ext = false,
.multi_out_num = 0,
.uninstall_drv = true,
.need_expand = false,
.expand_src_bits = I2S_BITS_PER_SAMPLE_16BIT,
};
audio_element_handle_t i2s_stream_writer = i2s_stream_init(&i2s_cfg);
rsp_filter_cfg_t rsp_cfg = {
.src_rate = 16000,
.src_ch = 1,
.dest_rate = 16000,
.dest_bits = 16,
.dest_ch = 2,
.src_bits = 16,
.mode = RESAMPLE_DECODE_MODE,
.max_indata_bytes = RSP_FILTER_BUFFER_BYTE,
.out_len_bytes = RSP_FILTER_BUFFER_BYTE,
.type = ESP_RESAMPLE_TYPE_AUTO,
.complexity = 2,
.down_ch_idx = 0,
.prefer_flag = ESP_RSP_PREFER_TYPE_SPEED,
.out_rb_size = RSP_FILTER_RINGBUFFER_SIZE,
.task_stack = RSP_FILTER_TASK_STACK,
.task_core = RSP_FILTER_TASK_CORE,
.task_prio = RSP_FILTER_TASK_PRIO,
.stack_in_ext = true,
};
audio_element_handle_t filter = rsp_filter_init(&rsp_cfg);
raw_stream_cfg_t raw_cfg = {
.type = AUDIO_STREAM_WRITER,
.out_rb_size = 8 * 1024,
};
audio_element_handle_t raw_write = raw_stream_init(&raw_cfg);
audio_pipeline_register(pipeline, raw_write, "raw");
audio_pipeline_register(pipeline, filter, "filter");
audio_pipeline_register(pipeline, i2s_stream_writer, "i2s");
const char *link_tag[3] = {
"raw",
// "filter",
"i2s",
};
audio_pipeline_link(pipeline, &link_tag[0], 2);
audio_pipeline_run(pipeline);
DataEvent data_event;
event.type = TaskEventType::STARTED;
xQueueSend(this_speaker->event_queue_, &event, 0);
uint32_t last_received = millis();
while (true) {
if (xQueueReceive(this_speaker->buffer_queue_, &data_event, 0) != pdTRUE) {
if (millis() - last_received > 100) {
// No audio for 100ms, stop
break;
} else {
continue;
}
}
if (data_event.stop) {
// Stop signal from main thread
while (xQueueReceive(this_speaker->buffer_queue_, &data_event, 0) == pdTRUE) {
// Flush queue
}
break;
}
size_t remaining = data_event.len;
size_t current = 0;
if (remaining > 0)
last_received = millis();
while (remaining > 0) {
int bytes_written = raw_stream_write(raw_write, (char *) data_event.data + current, remaining);
if (bytes_written == ESP_FAIL) {
event = {.type = TaskEventType::WARNING, .err = ESP_FAIL};
xQueueSend(this_speaker->event_queue_, &event, 0);
continue;
}
remaining -= bytes_written;
current += bytes_written;
}
event.type = TaskEventType::PLAYING;
xQueueSend(this_speaker->event_queue_, &event, 0);
}
audio_pipeline_stop(pipeline);
audio_pipeline_wait_for_stop(pipeline);
audio_pipeline_terminate(pipeline);
event.type = TaskEventType::STOPPING;
xQueueSend(this_speaker->event_queue_, &event, portMAX_DELAY);
audio_pipeline_unregister(pipeline, i2s_stream_writer);
audio_pipeline_unregister(pipeline, filter);
audio_pipeline_unregister(pipeline, raw_write);
audio_pipeline_deinit(pipeline);
audio_element_deinit(i2s_stream_writer);
audio_element_deinit(filter);
audio_element_deinit(raw_write);
event.type = TaskEventType::STOPPED;
xQueueSend(this_speaker->event_queue_, &event, portMAX_DELAY);
while (true) {
delay(10);
}
}
void ESPADFSpeaker::stop() {
if (this->state_ == speaker::STATE_STOPPED)
return;
if (this->state_ == speaker::STATE_STARTING) {
this->state_ = speaker::STATE_STOPPED;
return;
}
this->state_ = speaker::STATE_STOPPING;
DataEvent data;
data.stop = true;
xQueueSendToFront(this->buffer_queue_, &data, portMAX_DELAY);
}
void ESPADFSpeaker::watch_() {
TaskEvent event;
if (xQueueReceive(this->event_queue_, &event, 0) == pdTRUE) {
switch (event.type) {
case TaskEventType::STARTING:
case TaskEventType::STARTED:
case TaskEventType::STOPPING:
break;
case TaskEventType::PLAYING:
this->status_clear_warning();
break;
case TaskEventType::STOPPED:
this->parent_->unlock();
this->state_ = speaker::STATE_STOPPED;
vTaskDelete(this->player_task_handle_);
this->player_task_handle_ = nullptr;
break;
case TaskEventType::WARNING:
ESP_LOGW(TAG, "Error writing to pipeline: %s", esp_err_to_name(event.err));
this->status_set_warning();
break;
}
}
}
void ESPADFSpeaker::loop() {
switch (this->state_) {
case speaker::STATE_STARTING:
this->start_();
break;
case speaker::STATE_RUNNING:
this->watch_();
break;
case speaker::STATE_STOPPING:
case speaker::STATE_STOPPED:
break;
}
}
size_t ESPADFSpeaker::play(const uint8_t *data, size_t length) {
if (this->state_ != speaker::STATE_RUNNING && this->state_ != speaker::STATE_STARTING) {
this->start();
}
size_t remaining = length;
size_t index = 0;
while (remaining > 0) {
DataEvent event;
event.stop = false;
size_t to_send_length = std::min(remaining, BUFFER_SIZE);
event.len = to_send_length;
memcpy(event.data, data + index, to_send_length);
if (xQueueSend(this->buffer_queue_, &event, 0) != pdTRUE) {
return index; // Queue full
}
remaining -= to_send_length;
index += to_send_length;
}
return index;
}
} // namespace esp_adf
} // namespace esphome
#endif // USE_ESP32

View File

@ -0,0 +1,68 @@
#pragma once
#ifdef USE_ESP32
#include "../esp_adf.h"
#include <freertos/FreeRTOS.h>
#include <freertos/queue.h>
#include "esphome/components/speaker/speaker.h"
#include "esphome/core/component.h"
#include "esphome/core/helpers.h"
#include <audio_element.h>
#include <audio_pipeline.h>
namespace esphome {
namespace esp_adf {
static const size_t BUFFER_SIZE = 1024;
enum class TaskEventType : uint8_t {
STARTING = 0,
STARTED,
PLAYING,
STOPPING,
STOPPED,
WARNING = 255,
};
struct TaskEvent {
TaskEventType type;
esp_err_t err;
};
struct DataEvent {
bool stop;
size_t len;
uint8_t data[BUFFER_SIZE];
};
class ESPADFSpeaker : public ESPADFPipeline, public speaker::Speaker, public Component {
public:
float get_setup_priority() const override { return esphome::setup_priority::LATE; }
void setup() override;
void loop() override;
void start() override;
void stop() override;
size_t play(const uint8_t *data, size_t length) override;
protected:
void start_();
void watch_();
static void player_task(void *params);
TaskHandle_t player_task_handle_{nullptr};
QueueHandle_t buffer_queue_;
QueueHandle_t event_queue_;
};
} // namespace esp_adf
} // namespace esphome
#endif // USE_ESP32

View File

@ -62,6 +62,7 @@
// IDF-specific feature flags
#ifdef USE_ESP_IDF
#define USE_MQTT_IDF_ENQUEUE
#define USE_ESP_ADF
#endif
// ESP32-specific feature flags