Streaming audio chunks from API to XIAO esp32s3 with MAX98357 Amp

Hello everyone,

I’m facing challenges getting the I2S to work correctly for a project where I stream audio to a XIAO ESP32S3 and play it through a Max98357A amplifier. I’m in need of examples or guidance specifically for setting up the I2S configuration for this setup. Any shared experiences or resources would be incredibly helpful.

Thank you in advance!

Hi there,
Post the code you have , we can assist in guidance, The I2S is used for high quality audio and there are several examples of hardware connections and Software. It has been reported to deliver on Quality as AFAIK and working with several Amp chips , I even recall the MAX chip being used also.
HTH
GL :slight_smile: PJ

1 Like

Thanks for your reply PJ_Glasse. I’ve been working with this so far to send a .wav clip to a server I have set up. I’m now experimenting with som easier code just to get the MAX working, but have not yet had any luck. Once I figure it out, I would implement it into a flow like this:

  1. Hold button and record audio
  2. Append .wav header
  3. Send to API
    4.API transcribes text - generates response as audio using chunk-transfer - sends back audio in chunks (Currently just sends back the text response)
  4. The is the step I have yet to implement Audio is played back through the MAX chip connected to a speaker.
#include <I2S.h>
#include <WiFi.h>
#include <HTTPClient.h>

// WiFi credentials
const char* ssid = "SSID";
const char* password = "PASSWORD";

#define SAMPLE_RATE        8000U
#define SAMPLE_BITS        16
#define MAX_RECORD_TIME    60  // Maximum record time in seconds
#define BUTTON_PIN         4   // Button connected to pin 4
#define WAV_HEADER_SIZE    44

// Adjust the buffer size to accommodate maximum recording time
#define MAX_AUDIO_BUFFER_SIZE (SAMPLE_RATE * SAMPLE_BITS / 8 * MAX_RECORD_TIME + WAV_HEADER_SIZE)

HTTPClient http;
uint8_t *audioBuffer = nullptr;
bool isRecording = false;
bool sendPostFlag = false;
unsigned long lastDebounceTime = 0;
const unsigned long debounceDelay = 100;
size_t audioBufferIndex = 0;
QueueHandle_t xQueue;

// Function prototypes
void setup_wifi();
void setup_button();
void setup_i2s();
void IRAM_ATTR button_isr_handler();
void record_audio_task(void *param);
void send_audio_data(uint8_t *data, size_t length);
void generate_wav_header(uint8_t *wav_header, uint32_t wav_size, uint32_t sample_rate);

void setup() {
  Serial.begin(115200);
  while (!Serial);

  audioBuffer = (uint8_t *)ps_malloc(MAX_AUDIO_BUFFER_SIZE);
  if (audioBuffer == nullptr) {
    Serial.println("Failed to allocate memory for audio buffer");
    return;
  }

  setup_wifi();
  setup_button();
  setup_i2s();

  xQueue = xQueueCreate(10, sizeof(bool));
  xTaskCreate(record_audio_task, "RecordAudioTask", 16384, NULL, 1, NULL);
}

void loop() {
  // Main loop does nothing, operation is interrupt and task driven
}

void setup_wifi() {
  WiFi.begin(ssid, password);
  while (WiFi.status() != WL_CONNECTED) {
    delay(500);
    Serial.println("Connecting to WiFi...");
  }
  Serial.println("Connected to WiFi");
}

void setup_button() {
    pinMode(BUTTON_PIN, INPUT_PULLUP);
    attachInterrupt(digitalPinToInterrupt(BUTTON_PIN), button_isr_handler, CHANGE);
}

void setup_i2s() {
  I2S.setAllPins(-1, 42, 41, -1, -1);
  if (!I2S.begin(PDM_MONO_MODE, SAMPLE_RATE, SAMPLE_BITS)) {
    Serial.println("Failed to initialize I2S!");
    while (1);
  }
}

void IRAM_ATTR button_isr_handler() {
    unsigned long interruptTime = millis();
    if (interruptTime - lastDebounceTime > debounceDelay) {
        bool currentButtonState = digitalRead(BUTTON_PIN) == LOW;
        if (currentButtonState != isRecording) {
            isRecording = currentButtonState;
            lastDebounceTime = interruptTime;
            xQueueSendFromISR(xQueue, &isRecording, NULL);
        }
    }
}

void record_audio_task(void *param) {
    bool shouldRecord = false;
    bool currentlyRecording = false;
    Serial.println("Record audio task started.");

    while (true) {
        // Check for recording state updates
        while (xQueueReceive(xQueue, &shouldRecord, 0) == pdTRUE) {
            if (shouldRecord && !currentlyRecording) {
                currentlyRecording = true;
                Serial.println("Starting recording...");
                audioBufferIndex = WAV_HEADER_SIZE;  // Reset index for new recording, reserving space for WAV header
            } else if (!shouldRecord && currentlyRecording) {
                currentlyRecording = false;
                Serial.println("Stopping recording.");
                // Update WAV header with actual data size and prepare to send data
                generate_wav_header(audioBuffer, audioBufferIndex - WAV_HEADER_SIZE, SAMPLE_RATE);
                sendPostFlag = true;  // Set flag to indicate data is ready to be sent
            }
        }

        if (currentlyRecording) {
            size_t bytesRead = 0;
            TickType_t i2sReadTimeoutTicks = 1;  // 1 tick timeout for minimal blocking

            // Attempt to read audio data from I2S with minimal blocking
            esp_err_t result = esp_i2s::i2s_read(esp_i2s::I2S_NUM_0, audioBuffer + audioBufferIndex, MAX_AUDIO_BUFFER_SIZE - audioBufferIndex, &bytesRead, i2sReadTimeoutTicks);

            if (result == ESP_OK && bytesRead > 0) {
                audioBufferIndex += bytesRead;
                // Check for buffer overflow
                if (audioBufferIndex >= MAX_AUDIO_BUFFER_SIZE) {
                    currentlyRecording = false;
                    Serial.println("Max recording length reached, stopping recording.");
                    // Update WAV header with actual data size and prepare to send data
                    generate_wav_header(audioBuffer, audioBufferIndex - WAV_HEADER_SIZE, SAMPLE_RATE);
                    sendPostFlag = true;  // Set flag to indicate data is ready to be sent
                }
            }

            // Immediately check the queue again to see if recording should stop
            if (xQueueReceive(xQueue, &shouldRecord, 0) == pdTRUE && !shouldRecord) {
                currentlyRecording = false;
                Serial.println("Stopping recording via queue message.");
                generate_wav_header(audioBuffer, audioBufferIndex - WAV_HEADER_SIZE, SAMPLE_RATE);
                sendPostFlag = true;  // Prepare to send data
            }

            // Use a short delay to yield to other tasks
            vTaskDelay(1 / portTICK_PERIOD_MS);
        } else {
            // If not recording, check less frequently
            vTaskDelay(10 / portTICK_PERIOD_MS);
        }

        // Check if the audio data is ready to be sent
        if (sendPostFlag) {
            send_audio_data(audioBuffer, audioBufferIndex);  // Send the recorded audio data
            audioBufferIndex = WAV_HEADER_SIZE;  // Reset index for the next recording
            sendPostFlag = false;  // Reset the flag
        }
    }
}

void send_audio_data(uint8_t *data, size_t length) {
  if (WiFi.status() == WL_CONNECTED) {
    if (!http.connected()) { // Only begin a new connection if not already connected
      // http.begin("https://audio-server.deno.dev/api/audio");
      http.begin("http://192.168.1.137:8000/api/audio");
      http.addHeader("Content-Type", "audio/wav");
    }

    http.setTimeout(30000); // Set timeout before the request
    Serial.println("Sending audio data...");
    int httpResponseCode = http.POST(data, length);

    if (httpResponseCode > 0) {
      Serial.print("HTTP Response code: ");
      Serial.println(httpResponseCode);
      String payload = http.getString();
      Serial.println(payload);
      Serial.println("");
    } else {
      Serial.print("Error on sending POST: ");
      Serial.println(httpResponseCode);
    }
  } else {
    Serial.println("Error in WiFi connection");
  }
}

void generate_wav_header(uint8_t *wav_header, uint32_t wav_size, uint32_t sample_rate)
{
  uint32_t file_size = wav_size + WAV_HEADER_SIZE - 8;
  uint32_t byte_rate = SAMPLE_RATE * SAMPLE_BITS / 8;
  const uint8_t set_wav_header[] = {
    'R', 'I', 'F', 'F', // ChunkID
    file_size, file_size >> 8, file_size >> 16, file_size >> 24, // ChunkSize
    'W', 'A', 'V', 'E', // Format
    'f', 'm', 't', ' ', // Subchunk1ID
    0x10, 0x00, 0x00, 0x00, // Subchunk1Size (16 for PCM)
    0x01, 0x00, // AudioFormat (1 for PCM)
    0x01, 0x00, // NumChannels (1 channel)
    sample_rate, sample_rate >> 8, sample_rate >> 16, sample_rate >> 24, // SampleRate
    byte_rate, byte_rate >> 8, byte_rate >> 16, byte_rate >> 24, // ByteRate
    0x02, 0x00, // BlockAlign
    0x10, 0x00, // BitsPerSample (16 bits)
    'd', 'a', 't', 'a', // Subchunk2ID
    wav_size, wav_size >> 8, wav_size >> 16, wav_size >> 24, // Subchunk2Size
  };
  memcpy(wav_header, set_wav_header, sizeof(set_wav_header));
}

I’m wondering how to set up the i2s config for both the audio recording and playback, but have not gotten to change any of the i2s settings for the XIAO esp32s3 board…

Any help or links would be greatly appreciated!

I’ll happily share the API code, but it is written in Deno. Let me know if that is of interest!

1 Like

Hi there,
Looks Great , You got a good start, Here is Link , I have seen this setup Work posted by two different users. Sound quality is Very good they Reported.
“The lab has been tested with two different amps, the UDA1334 breakout board from Adafruit, and the MAX98357A breakout board from Sparkfun.”
https://itp.nyu.edu/physcomp/lab-playing-wav-files-from-an-arduino/
It’s one of the Most Comprehensive you’ll Find!! :wink: :+1:
HTH
GL :slight_smile: PJ

Thanks for the guide!

I met a bunch of errors along the way, and not sure how to fix it. The ArduinoSound had some missing dependencies, and did not fully understand how to adapt this to the XIAO esp32s3, as it seems that i need the esp_i2s:: namespace before i2s functions.

This is where I ended up and it seems that the audio is being recieved and logged (as binary) to the serial monitor, but not able to play it through the speaker; getting this error: E (19591) I2S: register I2S object to platform failed

#include <I2S.h>
#include <WiFi.h>
#include <HTTPClient.h>

// WiFi credentials
const char* ssid = "Internett";
const char* password = "Simato21";

#define SAMPLE_RATE        8000U
#define SAMPLE_BITS        16
#define MAX_RECORD_TIME    60  // Maximum record time in seconds
#define BUTTON_PIN         4   // Button connected to pin 4
#define WAV_HEADER_SIZE    44

// Adjust the buffer size to accommodate maximum recording time
#define MAX_AUDIO_BUFFER_SIZE (SAMPLE_RATE * SAMPLE_BITS / 8 * MAX_RECORD_TIME + WAV_HEADER_SIZE)

HTTPClient http;
uint8_t *audioBuffer = nullptr;
bool isRecording = false;
bool sendPostFlag = false;
unsigned long lastDebounceTime = 0;
const unsigned long debounceDelay = 100;
size_t audioBufferIndex = 0;
QueueHandle_t xQueue;

// Function prototypes
void setup_wifi();
void setup_button();
void setup_i2s();
void IRAM_ATTR button_isr_handler();
void record_audio_task(void *param);
void send_audio_data(uint8_t *data, size_t length);
void generate_wav_header(uint8_t *wav_header, uint32_t wav_size, uint32_t sample_rate);

void setup() {
  Serial.begin(115200);
  while (!Serial);

  audioBuffer = (uint8_t *)ps_malloc(MAX_AUDIO_BUFFER_SIZE);
  if (audioBuffer == nullptr) {
    Serial.println("Failed to allocate memory for audio buffer");
    return;
  }

  setup_wifi();
  setup_button();
  setup_i2s();

  xQueue = xQueueCreate(10, sizeof(bool));
  xTaskCreate(record_audio_task, "RecordAudioTask", 16384, NULL, 1, NULL);
}

void loop() {
  // Main loop does nothing, operation is interrupt and task driven
}

void setup_wifi() {
  WiFi.begin(ssid, password);
  while (WiFi.status() != WL_CONNECTED) {
    delay(500);
    Serial.println("Connecting to WiFi...");
  }
  Serial.println("Connected to WiFi");
}

void setup_button() {
    pinMode(BUTTON_PIN, INPUT_PULLUP);
    attachInterrupt(digitalPinToInterrupt(BUTTON_PIN), button_isr_handler, CHANGE);
}

void setup_i2s() {
  I2S.setAllPins(-1, 42, 41, -1, -1);
  if (!I2S.begin(PDM_MONO_MODE, SAMPLE_RATE, SAMPLE_BITS)) {
    Serial.println("Failed to initialize I2S!");
    while (1);
  }
}

void IRAM_ATTR button_isr_handler() {
    unsigned long interruptTime = millis();
    if (interruptTime - lastDebounceTime > debounceDelay) {
        bool currentButtonState = digitalRead(BUTTON_PIN) == LOW;
        if (currentButtonState != isRecording) {
            isRecording = currentButtonState;
            lastDebounceTime = interruptTime;
            xQueueSendFromISR(xQueue, &isRecording, NULL);
        }
    }
}

void record_audio_task(void *param) {
    bool shouldRecord = false;
    bool currentlyRecording = false;
    Serial.println("Record audio task started.");

    while (true) {
        // Check for recording state updates
        while (xQueueReceive(xQueue, &shouldRecord, 0) == pdTRUE) {
            if (shouldRecord && !currentlyRecording) {
                currentlyRecording = true;
                Serial.println("Starting recording...");
                audioBufferIndex = WAV_HEADER_SIZE;  // Reset index for new recording, reserving space for WAV header
            } else if (!shouldRecord && currentlyRecording) {
                currentlyRecording = false;
                Serial.println("Stopping recording.");
                // Update WAV header with actual data size and prepare to send data
                generate_wav_header(audioBuffer, audioBufferIndex - WAV_HEADER_SIZE, SAMPLE_RATE);
                sendPostFlag = true;  // Set flag to indicate data is ready to be sent
            }
        }

        if (currentlyRecording) {
            size_t bytesRead = 0;
            TickType_t i2sReadTimeoutTicks = 1;  // 1 tick timeout for minimal blocking

            // Attempt to read audio data from I2S with minimal blocking
            esp_err_t result = esp_i2s::i2s_read(esp_i2s::I2S_NUM_0, audioBuffer + audioBufferIndex, MAX_AUDIO_BUFFER_SIZE - audioBufferIndex, &bytesRead, i2sReadTimeoutTicks);

            if (result == ESP_OK && bytesRead > 0) {
                audioBufferIndex += bytesRead;
                // Check for buffer overflow
                if (audioBufferIndex >= MAX_AUDIO_BUFFER_SIZE) {
                    currentlyRecording = false;
                    Serial.println("Max recording length reached, stopping recording.");
                    // Update WAV header with actual data size and prepare to send data
                    generate_wav_header(audioBuffer, audioBufferIndex - WAV_HEADER_SIZE, SAMPLE_RATE);
                    sendPostFlag = true;  // Set flag to indicate data is ready to be sent
                }
            }

            // Immediately check the queue again to see if recording should stop
            if (xQueueReceive(xQueue, &shouldRecord, 0) == pdTRUE && !shouldRecord) {
                currentlyRecording = false;
                Serial.println("Stopping recording via queue message.");
                generate_wav_header(audioBuffer, audioBufferIndex - WAV_HEADER_SIZE, SAMPLE_RATE);
                sendPostFlag = true;  // Prepare to send data
            }

            // Use a short delay to yield to other tasks
            vTaskDelay(1 / portTICK_PERIOD_MS);
        } else {
            // If not recording, check less frequently
            vTaskDelay(10 / portTICK_PERIOD_MS);
        }

        // Check if the audio data is ready to be sent
        if (sendPostFlag) {
            send_audio_data(audioBuffer, audioBufferIndex);  // Send the recorded audio data
            audioBufferIndex = WAV_HEADER_SIZE;  // Reset index for the next recording
            sendPostFlag = false;  // Reset the flag
        }
    }
}

void send_audio_data(uint8_t *data, size_t length) {
  if (WiFi.status() == WL_CONNECTED) {
    if (!http.connected()) { // Only begin a new connection if not already connected
      // http.begin("https://audio-server.deno.dev/api/audio");
      http.begin("http://192.168.1.137:8000/api/audio");
      http.addHeader("Content-Type", "audio/wav");
    }

    http.setTimeout(30000); // Set timeout before the request
    Serial.println("Sending audio data...");
    int httpResponseCode = http.POST(data, length);

    if (httpResponseCode > 0) {
      Serial.print("HTTP Response code: ");
      Serial.println(httpResponseCode);
      String payload = http.getString();
      Serial.println(payload);
      Serial.println("");

      // Call handle_api_response here if you expect a specific response after POST
      handle_api_response();
    } else {
      Serial.print("Error on sending POST: ");
      Serial.println(httpResponseCode);
    }
  } else {
    Serial.println("Error in WiFi connection");
  }
}

void generate_wav_header(uint8_t *wav_header, uint32_t wav_size, uint32_t sample_rate)
{
  uint32_t file_size = wav_size + WAV_HEADER_SIZE - 8;
  uint32_t byte_rate = SAMPLE_RATE * SAMPLE_BITS / 8;
  const uint8_t set_wav_header[] = {
    'R', 'I', 'F', 'F', // ChunkID
    file_size, file_size >> 8, file_size >> 16, file_size >> 24, // ChunkSize
    'W', 'A', 'V', 'E', // Format
    'f', 'm', 't', ' ', // Subchunk1ID
    0x10, 0x00, 0x00, 0x00, // Subchunk1Size (16 for PCM)
    0x01, 0x00, // AudioFormat (1 for PCM)
    0x01, 0x00, // NumChannels (1 channel)
    sample_rate, sample_rate >> 8, sample_rate >> 16, sample_rate >> 24, // SampleRate
    byte_rate, byte_rate >> 8, byte_rate >> 16, byte_rate >> 24, // ByteRate
    0x02, 0x00, // BlockAlign
    0x10, 0x00, // BitsPerSample (16 bits)
    'd', 'a', 't', 'a', // Subchunk2ID
    wav_size, wav_size >> 8, wav_size >> 16, wav_size >> 24, // Subchunk2Size
  };
  memcpy(wav_header, set_wav_header, sizeof(set_wav_header));
}

void setup_speaker_i2s() {
  // Configure I2S for speaker output
  esp_i2s::i2s_config_t i2s_config = {
      .mode = (esp_i2s::i2s_mode_t)(esp_i2s::I2S_MODE_MASTER | esp_i2s::I2S_MODE_TX),
      .sample_rate = 44100,  // Adjust as needed
      .bits_per_sample = esp_i2s::I2S_BITS_PER_SAMPLE_16BIT,  // Adjust as needed
      .channel_format = esp_i2s::I2S_CHANNEL_FMT_RIGHT_LEFT,  // Adjust as needed
      .communication_format =esp_i2s:: I2S_COMM_FORMAT_I2S,
      .intr_alloc_flags = ESP_INTR_FLAG_LEVEL1,
      .dma_buf_count = 8,
      .dma_buf_len = 64,
      .use_apll = false,
      .tx_desc_auto_clear = true,
      .fixed_mclk = 0
  };

  esp_i2s::i2s_pin_config_t pin_config = {
      .bck_io_num = 8,
      .ws_io_num = 7,
      .data_out_num = 9,
      .data_in_num = I2S_PIN_NO_CHANGE  // Not used
  };

  esp_i2s::i2s_driver_install(esp_i2s::I2S_NUM_0, &i2s_config, 0, NULL);  // Choose I2S_NUM_0 or I2S_NUM_1
  esp_i2s::i2s_set_pin(esp_i2s::I2S_NUM_0, &pin_config);  // Match the I2S number used above
}

void handle_api_response() {
    setup_speaker_i2s();
    int httpResponseCode = http.GET(); // Use POST or GET according to your request type

    if (httpResponseCode == 200) { // HTTP OK
        Serial.println("Successfully received response from server");

        // Get the response as a stream
        WiFiClient *stream = http.getStreamPtr();

        // Buffer to store chunks of the audio stream
        uint8_t buffer[1024];
        int len = 0;

        // Assuming you've already configured I2S for output elsewhere in your code
        // Read the stream and play audio chunks
        while ((len = stream->readBytes(buffer, sizeof(buffer))) > 0) {
            size_t bytesWritten;
            // Directly write the audio buffer to the I2S output
            esp_i2s::i2s_write(esp_i2s::I2S_NUM_0, buffer, len, &bytesWritten, portMAX_DELAY);
        }
    } else {
        Serial.print("Failed to get response from server, HTTP Response code: ");
        Serial.println(httpResponseCode);
        // Handle non-200 responses or server errors here
    }
}

Hi there,
That’s great, Check out the Drone bot GUY he’s got this video and some code that can be adapted playing speaker and recording a MIC.
The Pins are the Main issue with Xiao and getting it to GO!, I don’t see the pin define for the I2S interface, BCLK, or WS
look here, https://www.espressif.com/sites/default/files/documentation/esp32-s3_technical_reference_manual_en.pdf
section 28.3

Your close ,
HTH
GL :slight_smile: PJ

1 Like

I got the example from Drone bot GUY working, I realised that I had the wrong “Audio.h” library and that this was the right one. Still a lot more to implement, but getting close

EDIT: added code

#include "Arduino.h"
#include "WiFi.h"
#include "Audio.h"
#include "HTTPClient.h"
#include "FS.h"
#include "LittleFS.h"

#define I2S_DOUT  9
#define I2S_BCLK  8
#define I2S_LRC   7

Audio audio;

const char* ssid = "SSID";
const char* password = "PASSWORD";

void setup() {
  Serial.begin(115200);
  WiFi.begin(ssid, password);
  while (WiFi.status() != WL_CONNECTED) {
    delay(500);
    Serial.print(".");
  }
  Serial.println("WiFi connected");

  // Initialize and mount LittleFS
  if (!LittleFS.begin(true)) {
    Serial.println("An error has occurred while mounting LittleFS");
    return;
  }

  // Delete the existing response file if it exists
  if (LittleFS.exists("/response.mp3")) {
    LittleFS.remove("/response.mp3");
    Serial.println("Existing MP3 file deleted");
  }

  // Initialize the I2S output
  audio.setPinout(I2S_BCLK, I2S_LRC, I2S_DOUT);
  audio.setVolume(100); // Adjust volume as needed

  HTTPClient http;
  http.begin("http://192.168.1.137:8000/api/audio"); // Your API endpoint
  http.addHeader("Content-Type", "application/json");

  int httpResponseCode = http.POST("{\"data\":\"Your data here\"}"); // Send POST request
  if (httpResponseCode > 0) {
    File file = LittleFS.open("/response.mp3", FILE_WRITE);
    if (file) {
      http.writeToStream(&file);
      file.close();
      Serial.println("MP3 saved");
    } else {
      Serial.println("Failed to open file for writing");
    }
  } else {
    Serial.print("Error on sending POST: ");
    Serial.println(httpResponseCode);
  }
  http.end();

  // Play the saved MP3 file using LittleFS
  audio.connecttoFS(LittleFS, "/response.mp3");
}

void loop() {
  audio.loop();
}

:grin: :+1:
Nice work, You’ll get it.
Those LIB’s will get you every time…LOL
If the code is Not Proprietary , You can post up and others can comment and It may help others.
The audio is Great if it’s done correctly.
GL :slight_smile: PJ :v:

1 Like

Streaming chunks was not as easy as hoped, so waiting for the whole file before playing. Now I’m struggling to use the Audio.h for playback, and I2S.h for recording, they seem to be interfering, and not finding any good solutions. This is what I’m working with now, but getting errors from the Audio.h library as I’m not using the esp_i2s:: namespace it seems like…

#include <I2S.h>
#include <WiFi.h>
#include <HTTPClient.h>
#include <Audio.h>
#include <LittleFS.h>

// WiFi credentials
const char* ssid = "SSID";
const char* password = "PASSWORD";

#define SAMPLE_RATE        8000U
#define SAMPLE_BITS        16
#define MAX_RECORD_TIME    60  // Maximum record time in seconds
#define BUTTON_PIN         4   // Button connected to pin 4
#define WAV_HEADER_SIZE    44

#define I2S_DOUT  9
#define I2S_BCLK  8
#define I2S_LRC   7

Audio audio;

// Adjust the buffer size to accommodate maximum recording time
#define MAX_AUDIO_BUFFER_SIZE (SAMPLE_RATE * SAMPLE_BITS / 8 * MAX_RECORD_TIME + WAV_HEADER_SIZE)

HTTPClient http;
uint8_t *audioBuffer = nullptr;
bool isRecording = false;
bool sendPostFlag = false;
unsigned long lastDebounceTime = 0;
const unsigned long debounceDelay = 100;
size_t audioBufferIndex = 0;
QueueHandle_t xQueue;

// Function prototypes
void setup_wifi();
void setup_button();
void setup_i2s();
void IRAM_ATTR button_isr_handler();
void record_audio_task(void *param);
void send_audio_data(uint8_t *data, size_t length);
void generate_wav_header(uint8_t *wav_header, uint32_t wav_size, uint32_t sample_rate);

void setup() {
  Serial.begin(115200);
  while (!Serial);

  audioBuffer = (uint8_t *)ps_malloc(MAX_AUDIO_BUFFER_SIZE);
  if (audioBuffer == nullptr) {
    Serial.println("Failed to allocate memory for audio buffer");
    return;
  }

  setup_wifi();
  setup_button();
  setup_i2s();
  audio.setPinout(I2S_BCLK, I2S_LRC, I2S_DOUT);
  audio.setVolume(100);

  if (!LittleFS.begin(true)) {
    Serial.println("An error has occurred while mounting LittleFS");
    return;
  }

  xQueue = xQueueCreate(10, sizeof(bool));
  xTaskCreate(record_audio_task, "RecordAudioTask", 16384, NULL, 1, NULL);
}

void loop() {
  audio.loop();
}

void setup_wifi() {
  WiFi.begin(ssid, password);
  while (WiFi.status() != WL_CONNECTED) {
    delay(500);
    Serial.println("Connecting to WiFi...");
  }
  Serial.println("Connected to WiFi");
}

void setup_button() {
    pinMode(BUTTON_PIN, INPUT_PULLUP);
    attachInterrupt(digitalPinToInterrupt(BUTTON_PIN), button_isr_handler, CHANGE);
}

void setup_i2s() {
  I2S.setAllPins(-1, 42, 41, -1, -1);
  if (!I2S.begin(PDM_MONO_MODE, SAMPLE_RATE, SAMPLE_BITS)) {
    Serial.println("Failed to initialize I2S!");
    while (1);
  }
}

void IRAM_ATTR button_isr_handler() {
    unsigned long interruptTime = millis();
    if (interruptTime - lastDebounceTime > debounceDelay) {
        bool currentButtonState = digitalRead(BUTTON_PIN) == LOW;
        if (currentButtonState != isRecording) {
            isRecording = currentButtonState;
            lastDebounceTime = interruptTime;
            xQueueSendFromISR(xQueue, &isRecording, NULL);
        }
    }
}

void record_audio_task(void *param) {
    bool shouldRecord = false;
    bool currentlyRecording = false;
    Serial.println("Record audio task started.");

    while (true) {
        // Check for recording state updates
        while (xQueueReceive(xQueue, &shouldRecord, 0) == pdTRUE) {
            if (shouldRecord && !currentlyRecording) {
                currentlyRecording = true;
                Serial.println("Starting recording...");
                audioBufferIndex = WAV_HEADER_SIZE;  // Reset index for new recording, reserving space for WAV header
            } else if (!shouldRecord && currentlyRecording) {
                currentlyRecording = false;
                Serial.println("Stopping recording.");
                // Update WAV header with actual data size and prepare to send data
                generate_wav_header(audioBuffer, audioBufferIndex - WAV_HEADER_SIZE, SAMPLE_RATE);
                sendPostFlag = true;  // Set flag to indicate data is ready to be sent
            }
        }

        if (currentlyRecording) {
            size_t bytesRead = 0;
            TickType_t i2sReadTimeoutTicks = 1;  // 1 tick timeout for minimal blocking

            // Attempt to read audio data from I2S with minimal blocking
            esp_err_t result = esp_i2s::i2s_read(esp_i2s::I2S_NUM_0, audioBuffer + audioBufferIndex, MAX_AUDIO_BUFFER_SIZE - audioBufferIndex, &bytesRead, i2sReadTimeoutTicks);

            if (result == ESP_OK && bytesRead > 0) {
                audioBufferIndex += bytesRead;
                // Check for buffer overflow
                if (audioBufferIndex >= MAX_AUDIO_BUFFER_SIZE) {
                    currentlyRecording = false;
                    Serial.println("Max recording length reached, stopping recording.");
                    // Update WAV header with actual data size and prepare to send data
                    generate_wav_header(audioBuffer, audioBufferIndex - WAV_HEADER_SIZE, SAMPLE_RATE);
                    sendPostFlag = true;  // Set flag to indicate data is ready to be sent
                }
            }

            // Immediately check the queue again to see if recording should stop
            if (xQueueReceive(xQueue, &shouldRecord, 0) == pdTRUE && !shouldRecord) {
                currentlyRecording = false;
                Serial.println("Stopping recording via queue message.");
                generate_wav_header(audioBuffer, audioBufferIndex - WAV_HEADER_SIZE, SAMPLE_RATE);
                sendPostFlag = true;  // Prepare to send data
            }

            // Use a short delay to yield to other tasks
            vTaskDelay(1 / portTICK_PERIOD_MS);
        } else {
            // If not recording, check less frequently
            vTaskDelay(10 / portTICK_PERIOD_MS);
        }

        // Check if the audio data is ready to be sent
        if (sendPostFlag) {
            send_audio_data(audioBuffer, audioBufferIndex);  // Send the recorded audio data
            audioBufferIndex = WAV_HEADER_SIZE;  // Reset index for the next recording
            sendPostFlag = false;  // Reset the flag
        }
    }
}

void send_audio_data(uint8_t *data, size_t length) {
  if (WiFi.status() == WL_CONNECTED) {
    if (!http.connected()) { // Only begin a new connection if not already connected
      // http.begin("https://audio-server.deno.dev/api/audio");
      http.begin("http://192.168.1.137:8000/api/audio");
      http.addHeader("Content-Type", "audio/wav");
    }

    http.setTimeout(30000); // Set timeout before the request
    Serial.println("Sending audio data...");
    int httpResponseCode = http.POST(data, length);

    if (httpResponseCode > 0) {
    File file = LittleFS.open("/response.mp3", FILE_WRITE);
    if (file) {
      http.writeToStream(&file);
      file.close();
      Serial.println("MP3 saved");
    } else {
      Serial.println("Failed to open file for writing");
    }
  } else {
    Serial.print("Error on sending POST: ");
    Serial.println(httpResponseCode);
  }
    audio.connecttoFS(LittleFS, "/response.mp3");
  }
}

void generate_wav_header(uint8_t *wav_header, uint32_t wav_size, uint32_t sample_rate)
{
  uint32_t file_size = wav_size + WAV_HEADER_SIZE - 8;
  uint32_t byte_rate = SAMPLE_RATE * SAMPLE_BITS / 8;
  const uint8_t set_wav_header[] = {
    'R', 'I', 'F', 'F', // ChunkID
    file_size, file_size >> 8, file_size >> 16, file_size >> 24, // ChunkSize
    'W', 'A', 'V', 'E', // Format
    'f', 'm', 't', ' ', // Subchunk1ID
    0x10, 0x00, 0x00, 0x00, // Subchunk1Size (16 for PCM)
    0x01, 0x00, // AudioFormat (1 for PCM)
    0x01, 0x00, // NumChannels (1 channel)
    sample_rate, sample_rate >> 8, sample_rate >> 16, sample_rate >> 24, // SampleRate
    byte_rate, byte_rate >> 8, byte_rate >> 16, byte_rate >> 24, // ByteRate
    0x02, 0x00, // BlockAlign
    0x10, 0x00, // BitsPerSample (16 bits)
    'd', 'a', 't', 'a', // Subchunk2ID
    wav_size, wav_size >> 8, wav_size >> 16, wav_size >> 24, // Subchunk2Size
  };
  memcpy(wav_header, set_wav_header, sizeof(set_wav_header));
}

Here are the errors I’m getting:

In file included from /Users/simon/Documents/Arduino/XIAO/XIAO.ino:4:
/Users/simon/Documents/Arduino/libraries/ESP32-audioI2S-master/src/Audio.h:480:5: error: 'i2s_config_t' does not name a type; did you mean 'wifi_config_t'?
     i2s_config_t          m_i2s_config = {}; // stores values for I2S driver
     ^~~~~~~~~~~~
     wifi_config_t
/Users/simon/Documents/Arduino/libraries/ESP32-audioI2S-master/src/Audio.h:481:5: error: 'i2s_pin_config_t' does not name a type; did you mean 'eth_phy_config_t'?
     i2s_pin_config_t      m_pin_config = {};
     ^~~~~~~~~~~~~~~~
     eth_phy_config_t
/Users/simon/Documents/Arduino/libraries/ESP32-audioI2S-master/src/Audio.h:166:83: error: 'I2S_NUM_0' was not declared in this scope
     Audio(bool internalDAC = false, uint8_t channelEnabled = 3, uint8_t i2sPort = I2S_NUM_0); // #99
                                                                                   ^~~~~~~~~
/Users/simon/Documents/Arduino/libraries/ESP32-audioI2S-master/src/Audio.h:166:83: note: suggested alternative:
In file included from /Users/simon/Library/Arduino15/packages/esp32/hardware/esp32/2.0.14/libraries/I2S/src/I2S.h:26,
                 from /Users/simon/Documents/Arduino/XIAO/XIAO.ino:1:
/Users/simon/Library/Arduino15/packages/esp32/hardware/esp32/2.0.14/tools/sdk/esp32s3/include/driver/include/driver/i2s.h:34:5: note:   'I2S_NUM_0'
     I2S_NUM_0 = 0,                 /*!< I2S port 0 */
     ^~~~~~~~~
In file included from /Users/simon/Documents/Arduino/XIAO/XIAO.ino:4:
/Users/simon/Documents/Arduino/libraries/ESP32-audioI2S-master/src/Audio.h:511:33: error: 'I2S_NUM_0' was not declared in this scope
     uint8_t         m_i2s_num = I2S_NUM_0;          // I2S_NUM_0 or I2S_NUM_1
                                 ^~~~~~~~~
/Users/simon/Documents/Arduino/libraries/ESP32-audioI2S-master/src/Audio.h:511:33: note: suggested alternative:
In file included from /Users/simon/Library/Arduino15/packages/esp32/hardware/esp32/2.0.14/libraries/I2S/src/I2S.h:26,
                 from /Users/simon/Documents/Arduino/XIAO/XIAO.ino:1:
/Users/simon/Library/Arduino15/packages/esp32/hardware/esp32/2.0.14/tools/sdk/esp32s3/include/driver/include/driver/i2s.h:34:5: note:   'I2S_NUM_0'
     I2S_NUM_0 = 0,                 /*!< I2S port 0 */
     ^~~~~~~~~
Multiple libraries were found for "SD.h"
  Used: /Users/simon/Library/Arduino15/packages/esp32/hardware/esp32/2.0.14/libraries/SD
  Not used: /Users/simon/Library/Arduino15/libraries/SD
exit status 1

Compilation error: exit status 1

Okay, I’m getting closer!
this other post helped me solve the errors caused by the esp_i2s:: namespace, not working with the “Audio.h” library, by using the “driver/i2s.h” library instead.

I’m now having issues with the i2s settings for the speaker and mic overriding each other, I’m either able to record, but not play through speaker, or play response through speaker but get an error saying something along the lines of “RX mode not enabled” while trying to record.

#include <driver/i2s.h>
#include <WiFi.h>
#include <HTTPClient.h>
#include <Audio.h>
#include <LittleFS.h>

// WiFi credentials
const char* ssid = "SSID";
const char* password = "PASSWORD";

#define SAMPLE_RATE        8000U
#define SAMPLE_BITS        16
#define MAX_RECORD_TIME    60  // Maximum record time in seconds
#define BUTTON_PIN         4   // Button connected to pin 4
#define WAV_HEADER_SIZE    44

#define I2S_DOUT  9
#define I2S_BCLK  8
#define I2S_LRC   7

Audio audio;

// Adjust the buffer size to accommodate maximum recording time
#define MAX_AUDIO_BUFFER_SIZE (SAMPLE_RATE * SAMPLE_BITS / 8 * MAX_RECORD_TIME + WAV_HEADER_SIZE)

HTTPClient http;
uint8_t *audioBuffer = nullptr;
bool isRecording = false;
bool sendPostFlag = false;
unsigned long lastDebounceTime = 0;
const unsigned long debounceDelay = 100;
size_t audioBufferIndex = 0;
QueueHandle_t xQueue;

// Function prototypes
void setup_wifi();
void setup_button();
void setup_i2s();
void IRAM_ATTR button_isr_handler();
void record_audio_task(void *param);
void send_audio_data(uint8_t *data, size_t length);
void generate_wav_header(uint8_t *wav_header, uint32_t wav_size, uint32_t sample_rate);

void setup() {
  Serial.begin(115200);
  while (!Serial);

  audioBuffer = (uint8_t *)ps_malloc(MAX_AUDIO_BUFFER_SIZE);
  if (audioBuffer == nullptr) {
    Serial.println("Failed to allocate memory for audio buffer");
    return;
  }

  setup_wifi();
  setup_button();
  setup_i2s();
  audio.setPinout(I2S_BCLK, I2S_LRC, I2S_DOUT);
  audio.setVolume(100);

  if (!LittleFS.begin(true)) {
    Serial.println("An error has occurred while mounting LittleFS");
    return;
  }

  xQueue = xQueueCreate(10, sizeof(bool));
  xTaskCreate(record_audio_task, "RecordAudioTask", 16384, NULL, 1, NULL);
}

void loop() {
  audio.loop();
}

void setup_wifi() {
  WiFi.begin(ssid, password);
  while (WiFi.status() != WL_CONNECTED) {
    delay(500);
    Serial.println("Connecting to WiFi...");
  }
  Serial.println("Connected to WiFi");
}

void setup_button() {
    pinMode(BUTTON_PIN, INPUT_PULLUP);
    attachInterrupt(digitalPinToInterrupt(BUTTON_PIN), button_isr_handler, CHANGE);
}

void setup_i2s() {
  i2s_config_t i2s_config = {
    .mode                 = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_RX | I2S_MODE_PDM ),
    .sample_rate          = SAMPLE_RATE,
    .bits_per_sample      = I2S_BITS_PER_SAMPLE_16BIT,
    .channel_format       = I2S_CHANNEL_FMT_ONLY_LEFT,
    .communication_format = I2S_COMM_FORMAT_STAND_I2S,   
    .intr_alloc_flags     = ESP_INTR_FLAG_LEVEL1,
    .dma_buf_count        = 8,
    .dma_buf_len          = 512,    
    .use_apll             = false,
    .tx_desc_auto_clear   = false,
    .fixed_mclk           = 0
  };
  
  i2s_pin_config_t pin_config = {
    .bck_io_num = -1,    // IIS_SCLK
    .ws_io_num = 42,     // IIS_LCLK
    .data_out_num = -1,  // IIS_DSIN
    .data_in_num = 41,   // IIS_DOUT
  };

  i2s_driver_install((i2s_port_t)0, &i2s_config, 0, NULL);
  if (!ESP_OK) {
    Serial.printf("Error in i2s_driver_install");
  }

  i2s_set_pin((i2s_port_t)0, &pin_config);
  if (!ESP_OK) {
    Serial.printf("Error in i2s_set_pin");
  }

  i2s_zero_dma_buffer((i2s_port_t)0);
  if (!ESP_OK) {
    Serial.printf("Error in initializing dma buffer with 0");
  }
}

void IRAM_ATTR button_isr_handler() {
    unsigned long interruptTime = millis();
    if (interruptTime - lastDebounceTime > debounceDelay) {
        bool currentButtonState = digitalRead(BUTTON_PIN) == LOW;
        if (currentButtonState != isRecording) {
            isRecording = currentButtonState;
            lastDebounceTime = interruptTime;
            xQueueSendFromISR(xQueue, &isRecording, NULL);
        }
    }
}

void record_audio_task(void *param) {
    bool shouldRecord = false;
    bool currentlyRecording = false;
    Serial.println("Record audio task started.");

    while (true) {
        // Check for recording state updates
        while (xQueueReceive(xQueue, &shouldRecord, 0) == pdTRUE) {
            if (shouldRecord && !currentlyRecording) {
                currentlyRecording = true;
                Serial.println("Starting recording...");
                audioBufferIndex = WAV_HEADER_SIZE;  // Reset index for new recording, reserving space for WAV header
            } else if (!shouldRecord && currentlyRecording) {
                currentlyRecording = false;
                Serial.println("Stopping recording.");
                // Update WAV header with actual data size and prepare to send data
                generate_wav_header(audioBuffer, audioBufferIndex - WAV_HEADER_SIZE, SAMPLE_RATE);
                sendPostFlag = true;  // Set flag to indicate data is ready to be sent
            }
        }

        if (currentlyRecording) {
            size_t bytesRead = 0;
            TickType_t i2sReadTimeoutTicks = 1;  // 1 tick timeout for minimal blocking

            // Attempt to read audio data from I2S with minimal blocking
            esp_err_t result = i2s_read((i2s_port_t)0, audioBuffer + audioBufferIndex, MAX_AUDIO_BUFFER_SIZE - audioBufferIndex, &bytesRead, i2sReadTimeoutTicks);

            if (result == ESP_OK && bytesRead > 0) {
                audioBufferIndex += bytesRead;
                // Check for buffer overflow
                if (audioBufferIndex >= MAX_AUDIO_BUFFER_SIZE) {
                    currentlyRecording = false;
                    Serial.println("Max recording length reached, stopping recording.");
                    // Update WAV header with actual data size and prepare to send data
                    generate_wav_header(audioBuffer, audioBufferIndex - WAV_HEADER_SIZE, SAMPLE_RATE);
                    sendPostFlag = true;  // Set flag to indicate data is ready to be sent
                }
            }

            // Immediately check the queue again to see if recording should stop
            if (xQueueReceive(xQueue, &shouldRecord, 0) == pdTRUE && !shouldRecord) {
                currentlyRecording = false;
                Serial.println("Stopping recording via queue message.");
                generate_wav_header(audioBuffer, audioBufferIndex - WAV_HEADER_SIZE, SAMPLE_RATE);
                sendPostFlag = true;  // Prepare to send data
            }

            // Use a short delay to yield to other tasks
            vTaskDelay(1 / portTICK_PERIOD_MS);
        } else {
            // If not recording, check less frequently
            vTaskDelay(10 / portTICK_PERIOD_MS);
        }

        // Check if the audio data is ready to be sent
        if (sendPostFlag) {
            send_audio_data(audioBuffer, audioBufferIndex);  // Send the recorded audio data
            audioBufferIndex = WAV_HEADER_SIZE;  // Reset index for the next recording
            sendPostFlag = false;  // Reset the flag
        }
    }
}

void send_audio_data(uint8_t *data, size_t length) {
  if (WiFi.status() == WL_CONNECTED) {
    if (!http.connected()) { // Only begin a new connection if not already connected
      // http.begin("https://audio-server.deno.dev/api/audio");
      http.begin("http://192.168.1.137:8000/api/audio");
      http.addHeader("Content-Type", "audio/wav");
    }

    http.setTimeout(30000); // Set timeout before the request
    Serial.println("Sending audio data...");
    int httpResponseCode = http.POST(data, length);

    if (httpResponseCode > 0) {
    File file = LittleFS.open("/response.mp3", FILE_WRITE);
    if (file) {
      http.writeToStream(&file);
      file.close();
      Serial.println("MP3 saved");
    } else {
      Serial.println("Failed to open file for writing");
    }
  } else {
    Serial.print("Error on sending POST: ");
    Serial.println(httpResponseCode);
  }
    audio.connecttoFS(LittleFS, "/response.mp3");
  }
}

void generate_wav_header(uint8_t *wav_header, uint32_t wav_size, uint32_t sample_rate)
{
  uint32_t file_size = wav_size + WAV_HEADER_SIZE - 8;
  uint32_t byte_rate = SAMPLE_RATE * SAMPLE_BITS / 8;
  const uint8_t set_wav_header[] = {
    'R', 'I', 'F', 'F', // ChunkID
    file_size, file_size >> 8, file_size >> 16, file_size >> 24, // ChunkSize
    'W', 'A', 'V', 'E', // Format
    'f', 'm', 't', ' ', // Subchunk1ID
    0x10, 0x00, 0x00, 0x00, // Subchunk1Size (16 for PCM)
    0x01, 0x00, // AudioFormat (1 for PCM)
    0x01, 0x00, // NumChannels (1 channel)
    sample_rate, sample_rate >> 8, sample_rate >> 16, sample_rate >> 24, // SampleRate
    byte_rate, byte_rate >> 8, byte_rate >> 16, byte_rate >> 24, // ByteRate
    0x02, 0x00, // BlockAlign
    0x10, 0x00, // BitsPerSample (16 bits)
    'd', 'a', 't', 'a', // Subchunk2ID
    wav_size, wav_size >> 8, wav_size >> 16, wav_size >> 24, // Subchunk2Size
  };
  memcpy(wav_header, set_wav_header, sizeof(set_wav_header));
}

Nice, going… working through
I looked at this thread and was able to help another user get his working with a different amp board or dac.

maybe it will show the way?
HTH
GL :slight_smile: PJ :+1:

and here,

I got it working now! Switching between TX and RX setups solved the issue, although with some static crackle during the switch. Still would like to be able to “interrupt” the speech, and record over but currently that only leads to errors. The code is not pretty, but it works, so I’ll mark this as solved!

#include <driver/i2s.h>
#include <WiFi.h>
#include <HTTPClient.h>
#include <Audio.h>
#include <LittleFS.h>

// WiFi credentials
const char* ssid = "SSID";
const char* password = "PASSWORD";

#define SAMPLE_RATE        8000U
#define SAMPLE_BITS        16
#define MAX_RECORD_TIME    60  // Maximum record time in seconds
#define BUTTON_PIN         4   // Button connected to pin 4
#define WAV_HEADER_SIZE    44

#define I2S_DOUT  9
#define I2S_BCLK  8
#define I2S_LRC   7

Audio audio;

// Adjust the buffer size to accommodate maximum recording time
#define MAX_AUDIO_BUFFER_SIZE (SAMPLE_RATE * SAMPLE_BITS / 8 * MAX_RECORD_TIME + WAV_HEADER_SIZE)

HTTPClient http;
uint8_t *audioBuffer = nullptr;
bool isRecording = false;
bool sendPostFlag = false;
bool requestSwitchToRxMode = false;
unsigned long lastDebounceTime = 0;
const unsigned long debounceDelay = 100;
size_t audioBufferIndex = 0;
QueueHandle_t xQueue;

// Function prototypes
void setup_wifi();
void setup_button();
void setup_i2s(i2s_mode_t mode);
void switch_i2s_mode(i2s_mode_t mode);
void IRAM_ATTR button_isr_handler();
void record_audio_task(void *param);
void send_audio_data(uint8_t *data, size_t length);
void generate_wav_header(uint8_t *wav_header, uint32_t wav_size, uint32_t sample_rate);

void setup() {
  Serial.begin(115200);
  while (!Serial);

  audioBuffer = (uint8_t *)ps_malloc(MAX_AUDIO_BUFFER_SIZE);
  if (audioBuffer == nullptr) {
    Serial.println("Failed to allocate memory for audio buffer");
    return;
  }

  setup_wifi();
  setup_button();
  setup_i2s(I2S_MODE_RX);
  audio.setPinout(I2S_BCLK, I2S_LRC, I2S_DOUT);
  audio.setVolume(100);

  if (!LittleFS.begin(true)) {
    Serial.println("An error has occurred while mounting LittleFS");
    return;
  }

  xQueue = xQueueCreate(10, sizeof(bool));
  xTaskCreate(record_audio_task, "RecordAudioTask", 16384, NULL, 1, NULL);
}

void loop() {
  audio.loop();
}

void setup_wifi() {
  WiFi.begin(ssid, password);
  while (WiFi.status() != WL_CONNECTED) {
    delay(500);
    Serial.println("Connecting to WiFi...");
  }
  Serial.println("Connected to WiFi");
}

void setup_button() {
    pinMode(BUTTON_PIN, INPUT_PULLUP);
    attachInterrupt(digitalPinToInterrupt(BUTTON_PIN), button_isr_handler, CHANGE);
}

void setup_i2s(i2s_mode_t mode) {

    i2s_config_t i2s_config; 

    if (mode & I2S_MODE_TX) {
        // Speaker pin configuration
        i2s_config  = {
            .mode                 = (i2s_mode_t)(I2S_MODE_MASTER | mode),
            .sample_rate          = 16000U,
            .bits_per_sample      = I2S_BITS_PER_SAMPLE_32BIT,
            .channel_format       = I2S_CHANNEL_FMT_ONLY_LEFT,
            .communication_format = I2S_COMM_FORMAT_STAND_I2S,
            .intr_alloc_flags     = ESP_INTR_FLAG_LEVEL1,
            .dma_buf_count        = 8,
            .dma_buf_len          = 512,
            .use_apll             = false,
            .tx_desc_auto_clear   = false,  // Only applicable in TX mode
            .fixed_mclk           = 0
        };
    } else if (mode & I2S_MODE_RX) {
        // Microphone pin configuration
        i2s_config  = {
            .mode                 = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_PDM | mode),
            .sample_rate          = SAMPLE_RATE,
            .bits_per_sample      = I2S_BITS_PER_SAMPLE_16BIT,
            .channel_format       = I2S_CHANNEL_FMT_ONLY_LEFT,
            .communication_format = I2S_COMM_FORMAT_STAND_I2S,
            .intr_alloc_flags     = ESP_INTR_FLAG_LEVEL1,
            .dma_buf_count        = 8,
            .dma_buf_len          = 512,
            .use_apll             = false,
            .tx_desc_auto_clear   = false,  // Only applicable in TX mode
            .fixed_mclk           = 0
        };
    }

    i2s_pin_config_t pin_config;

    if (mode & I2S_MODE_TX) {
        // Speaker pin configuration
        pin_config = {
            .bck_io_num = I2S_BCLK,
            .ws_io_num = I2S_LRC,
            .data_out_num = I2S_DOUT,
            .data_in_num = -1  // Not used
        };
    } else if (mode & I2S_MODE_RX) {
        // Microphone pin configuration
        pin_config = {
            .bck_io_num = -1,  // Not used
            .ws_io_num = 42,  // IIS_LCLK for microphone
            .data_out_num = -1,  // Not used
            .data_in_num = 41   // IIS_DOUT for microphone
        };
    }

    // Uninstall the existing driver before setting a new configuration
    i2s_driver_uninstall((i2s_port_t)0);
    i2s_driver_install((i2s_port_t)0, &i2s_config, 0, NULL);
    i2s_set_pin((i2s_port_t)0, &pin_config);
    i2s_zero_dma_buffer((i2s_port_t)0);
}


void IRAM_ATTR button_isr_handler() {
    unsigned long interruptTime = millis();
    if (interruptTime - lastDebounceTime > debounceDelay) {
        bool currentButtonState = digitalRead(BUTTON_PIN) == LOW;
        if (currentButtonState != isRecording) {
            isRecording = currentButtonState;
            lastDebounceTime = interruptTime;

            if (isRecording) {
                requestSwitchToRxMode = true; // Request to switch to RX mode
            }
            xQueueSendFromISR(xQueue, &isRecording, NULL);
        }
    }
}

void record_audio_task(void *param) {
    bool shouldRecord = false;
    bool currentlyRecording = false;
    Serial.println("Record audio task started.");

    while (true) {
        // Handle mode switching request outside ISR
        if (requestSwitchToRxMode) {
            setup_i2s(I2S_MODE_RX); // Perform the mode switching
            requestSwitchToRxMode = false; // Reset the request flag
        }

        // Check for recording state updates
        while (xQueueReceive(xQueue, &shouldRecord, 0) == pdTRUE) {
            if (shouldRecord && !currentlyRecording) {
                currentlyRecording = true;
                Serial.println("Starting recording...");
                audioBufferIndex = WAV_HEADER_SIZE;  // Reset index for new recording
            } else if (!shouldRecord && currentlyRecording) {
                currentlyRecording = false;
                Serial.println("Stopping recording.");
                setup_i2s(I2S_MODE_TX); // Switch back to TX mode after recording stops

                // Update WAV header and prepare to send data
                generate_wav_header(audioBuffer, audioBufferIndex - WAV_HEADER_SIZE, SAMPLE_RATE);
                sendPostFlag = true;
            }
        }

        if (currentlyRecording) {
            size_t bytesRead = 0;
            TickType_t i2sReadTimeoutTicks = 1;  // 1 tick timeout for minimal blocking

            // Attempt to read audio data from I2S with minimal blocking
            esp_err_t result = i2s_read((i2s_port_t)0, audioBuffer + audioBufferIndex, MAX_AUDIO_BUFFER_SIZE - audioBufferIndex, &bytesRead, i2sReadTimeoutTicks);

            if (result == ESP_OK && bytesRead > 0) {
                audioBufferIndex += bytesRead;
                // Check for buffer overflow
                if (audioBufferIndex >= MAX_AUDIO_BUFFER_SIZE) {
                    currentlyRecording = false;
                    Serial.println("Max recording length reached, stopping recording.");
                    // Update WAV header with actual data siz e and prepare to send data
                    generate_wav_header(audioBuffer, audioBufferIndex - WAV_HEADER_SIZE, SAMPLE_RATE);
                    sendPostFlag = true;  // Set flag to indicate data is ready to be sent
                }
            }

            // Immediately check the queue again to see if recording should stop
            if (xQueueReceive(xQueue, &shouldRecord, 0) == pdTRUE && !shouldRecord) {
                currentlyRecording = false;
                Serial.println("Stopping recording via queue message.");
                generate_wav_header(audioBuffer, audioBufferIndex - WAV_HEADER_SIZE, SAMPLE_RATE);
                sendPostFlag = true;  // Prepare to send data
            }

            // Use a short delay to yield to other tasks
            vTaskDelay(1 / portTICK_PERIOD_MS);
        } else {
            // If not recording, check less frequently
            vTaskDelay(10 / portTICK_PERIOD_MS);
        }

        // Check if the audio data is ready to be sent
        if (sendPostFlag) {
            send_audio_data(audioBuffer, audioBufferIndex);  // Send the recorded audio data
            audioBufferIndex = WAV_HEADER_SIZE;  // Reset index for the next recording
            sendPostFlag = false;  // Reset the flag
        }
    }
}

void send_audio_data(uint8_t *data, size_t length) {
  if (WiFi.status() == WL_CONNECTED) {
    if (!http.connected()) { // Only begin a new connection if not already connected
      // http.begin("https://audio-server.deno.dev/api/audio");
      http.begin("http://192.168.1.137:8000/api/audio");
      http.addHeader("Content-Type", "audio/wav");
    }

    http.setTimeout(30000); // Set timeout before the request
    Serial.println("Sending audio data...");
    int httpResponseCode = http.POST(data, length);
    setup_i2s(I2S_MODE_TX);

    if (httpResponseCode > 0) {
      if (LittleFS.exists("/response.mp3")) {
        LittleFS.remove("/response.mp3");
      }

      File file = LittleFS.open("/response.mp3", FILE_WRITE);
      if (!file) {
          Serial.println("Failed to open /response.mp3 for writing.");
          return; // Exit the function to avoid further errors
      }
      if (file) {
        http.writeToStream(&file);
        file.close();
        Serial.println("MP3 saved");
      } else {
        Serial.println("Failed to open file for writing");
      }
    } else {
    Serial.print("Error on sending POST: ");
    Serial.println(httpResponseCode);
  }
    audio.connecttoFS(LittleFS, "/response.mp3");
  }
}

void generate_wav_header(uint8_t *wav_header, uint32_t wav_size, uint32_t sample_rate)
{
  uint32_t file_size = wav_size + WAV_HEADER_SIZE - 8;
  uint32_t byte_rate = SAMPLE_RATE * SAMPLE_BITS / 8;
  const uint8_t set_wav_header[] = {
    'R', 'I', 'F', 'F', // ChunkID
    file_size, file_size >> 8, file_size >> 16, file_size >> 24, // ChunkSize
    'W', 'A', 'V', 'E', // Format
    'f', 'm', 't', ' ', // Subchunk1ID
    0x10, 0x00, 0x00, 0x00, // Subchunk1Size (16 for PCM)
    0x01, 0x00, // AudioFormat (1 for PCM)
    0x01, 0x00, // NumChannels (1 channel)
    sample_rate, sample_rate >> 8, sample_rate >> 16, sample_rate >> 24, // SampleRate
    byte_rate, byte_rate >> 8, byte_rate >> 16, byte_rate >> 24, // ByteRate
    0x02, 0x00, // BlockAlign
    0x10, 0x00, // BitsPerSample (16 bits)
    'd', 'a', 't', 'a', // Subchunk2ID
    wav_size, wav_size >> 8, wav_size >> 16, wav_size >> 24, // Subchunk2Size
  };
  memcpy(wav_header, set_wav_header, sizeof(set_wav_header));
}` 

The latency is still quite bad, and I hope that I find a good TTS that support the languages I need and streaming in correct format.

1 Like

Update: I realised I can use one of the i2s interfaces for RX(Recording) and one for TX(Playback) removing the need to switch the i2s modes.