Skip to content

Commit ec7c541

Browse files
committed
Add RingBuffer to esp32-m5stack-atoms3r
Playing + Decoding in different threads gets us decent audio quality
1 parent 973b72b commit ec7c541

File tree

1 file changed

+51
-11
lines changed

1 file changed

+51
-11
lines changed

esp32-m5stack-atoms3r/src/media.cpp

Lines changed: 51 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,15 @@
1818

1919
#include "driver/i2c_master.h"
2020
#include <driver/i2s_std.h>
21+
#include "freertos/ringbuf.h"
2122

2223
#include "main.h"
2324

2425
#define SAMPLE_RATE (16000)
2526

2627
#define OPUS_BUFFER_SIZE 1276 // 1276 bytes is recommended by opus_encode
2728
#define PCM_BUFFER_SIZE 640
29+
#define PLAY_BUFFER_SIZE 50
2830

2931
#define OPUS_ENCODER_BITRATE 30000
3032
#define OPUS_ENCODER_COMPLEXITY 0
@@ -148,16 +150,11 @@ void configure_es8311(void) {
148150
ESP_ERROR_CHECK(esp_codec_dev_set_out_vol(audio_dev, 100));
149151
}
150152

151-
int Read(void* dest, int size) {
153+
int record_audio(void* dest, int size) {
152154
ESP_ERROR_CHECK_WITHOUT_ABORT(esp_codec_dev_read(audio_dev, (void*)dest, size));
153155
return size;
154156
}
155157

156-
int Write(const void* data, int size) {
157-
ESP_ERROR_CHECK_WITHOUT_ABORT(esp_codec_dev_write(audio_dev, (void*)data, size));
158-
return size;
159-
}
160-
161158
void pipecat_init_audio_capture() {
162159
i2c_master_bus_config_t i2c_bus_cfg = {
163160
.i2c_port = I2C_NUM_1,
@@ -177,7 +174,41 @@ void pipecat_init_audio_capture() {
177174
}
178175

179176
opus_int16 *decoder_buffer = NULL;
177+
RingbufHandle_t decoder_buffer_queue;
178+
std::atomic<int> play_task_buffer_idx = 0;
179+
180+
int play_audio(const void* data, int size) {
181+
ESP_ERROR_CHECK_WITHOUT_ABORT(esp_codec_dev_write(audio_dev, (void*)data, size));
182+
return size;
183+
}
184+
185+
static void play_task(void *arg) {
186+
size_t len;
187+
uint8_t *play_task_buffer[PLAY_BUFFER_SIZE + 1] = {0};
188+
189+
while (1) {
190+
auto audio_buffer = (uint8_t *) xRingbufferReceive(decoder_buffer_queue, &len, portMAX_DELAY);
191+
play_task_buffer_idx++;
192+
193+
if (play_task_buffer_idx < PLAY_BUFFER_SIZE) {
194+
play_task_buffer[play_task_buffer_idx] = audio_buffer;
195+
continue;
196+
}
197+
198+
if (play_task_buffer_idx == PLAY_BUFFER_SIZE) {
199+
for (auto i = 1; i < PLAY_BUFFER_SIZE; i++) {
200+
play_audio(play_task_buffer[i], PCM_BUFFER_SIZE);
201+
vRingbufferReturnItem(decoder_buffer_queue, play_task_buffer[i]);
202+
}
203+
}
204+
205+
play_audio(audio_buffer, PCM_BUFFER_SIZE);
206+
vRingbufferReturnItem(decoder_buffer_queue, audio_buffer);
207+
}
208+
}
209+
180210
OpusDecoder *opus_decoder = NULL;
211+
StaticRingbuffer_t rb_struct;
181212

182213
void pipecat_init_audio_decoder() {
183214
int decoder_error = 0;
@@ -188,10 +219,15 @@ void pipecat_init_audio_decoder() {
188219
}
189220

190221
decoder_buffer = (opus_int16 *)malloc(PCM_BUFFER_SIZE);
222+
223+
auto ring_buffer_size = PCM_BUFFER_SIZE * (PLAY_BUFFER_SIZE) + (PLAY_BUFFER_SIZE * 10);
224+
decoder_buffer_queue = xRingbufferCreateStatic(ring_buffer_size, RINGBUF_TYPE_NOSPLIT, (uint8_t *) malloc(ring_buffer_size), &rb_struct);
225+
xTaskCreate(play_task, "play_task", 4096, NULL, 5, NULL);
191226
}
192227

193228

194229
std::atomic<bool> is_playing = false;
230+
195231
unsigned int silence_count = 0;
196232

197233
void set_is_playing(int16_t *in_buf, size_t in_samples) {
@@ -210,18 +246,22 @@ void set_is_playing(int16_t *in_buf, size_t in_samples) {
210246

211247
if (silence_count >= 20 && is_playing) {
212248
is_playing = false;
249+
play_task_buffer_idx = 0;
213250
} else if (any_set && !is_playing) {
214251
is_playing = true;
215252
}
216253
}
217254

218-
219255
void pipecat_audio_decode(uint8_t *data, size_t size) {
220-
int decoded_size =
221-
opus_decode(opus_decoder, data, size, decoder_buffer, PCM_BUFFER_SIZE, 0);
256+
int decoded_size = opus_decode(opus_decoder, data, size, decoder_buffer, PCM_BUFFER_SIZE, 0);
222257

223258
if (decoded_size > 0) {
224-
Write(decoder_buffer, PCM_BUFFER_SIZE);
259+
set_is_playing(decoder_buffer, decoded_size);
260+
if (!is_playing) {
261+
return;
262+
}
263+
264+
xRingbufferSend(decoder_buffer_queue, decoder_buffer, PCM_BUFFER_SIZE, 0);
225265
}
226266
}
227267

@@ -257,7 +297,7 @@ void pipecat_send_audio(PeerConnection *peer_connection) {
257297
memset(read_buffer, 0, PCM_BUFFER_SIZE);
258298
vTaskDelay(pdMS_TO_TICKS(20));
259299
} else {
260-
// Do record
300+
record_audio(read_buffer, PCM_BUFFER_SIZE);
261301
}
262302

263303
auto encoded_size = opus_encode(opus_encoder, (const opus_int16 *)read_buffer,

0 commit comments

Comments
 (0)