Streaming audio chunks from API to XIAO esp32s3 with MAX98357 Amp

Okay, I’m getting closer!
this other post helped me solve the errors caused by the esp_i2s:: namespace, not working with the “Audio.h” library, by using the “driver/i2s.h” library instead.

I’m now having issues with the i2s settings for the speaker and mic overriding each other, I’m either able to record, but not play through speaker, or play response through speaker but get an error saying something along the lines of “RX mode not enabled” while trying to record.

#include <driver/i2s.h>
#include <WiFi.h>
#include <HTTPClient.h>
#include <Audio.h>
#include <LittleFS.h>

// WiFi credentials
const char* ssid = "SSID";
const char* password = "PASSWORD";

#define SAMPLE_RATE        8000U
#define SAMPLE_BITS        16
#define MAX_RECORD_TIME    60  // Maximum record time in seconds
#define BUTTON_PIN         4   // Button connected to pin 4
#define WAV_HEADER_SIZE    44

#define I2S_DOUT  9
#define I2S_BCLK  8
#define I2S_LRC   7

Audio audio;

// Adjust the buffer size to accommodate maximum recording time
#define MAX_AUDIO_BUFFER_SIZE (SAMPLE_RATE * SAMPLE_BITS / 8 * MAX_RECORD_TIME + WAV_HEADER_SIZE)

HTTPClient http;
uint8_t *audioBuffer = nullptr;
bool isRecording = false;
bool sendPostFlag = false;
unsigned long lastDebounceTime = 0;
const unsigned long debounceDelay = 100;
size_t audioBufferIndex = 0;
QueueHandle_t xQueue;

// Function prototypes
void setup_wifi();
void setup_button();
void setup_i2s();
void IRAM_ATTR button_isr_handler();
void record_audio_task(void *param);
void send_audio_data(uint8_t *data, size_t length);
void generate_wav_header(uint8_t *wav_header, uint32_t wav_size, uint32_t sample_rate);

void setup() {
  Serial.begin(115200);
  while (!Serial);

  audioBuffer = (uint8_t *)ps_malloc(MAX_AUDIO_BUFFER_SIZE);
  if (audioBuffer == nullptr) {
    Serial.println("Failed to allocate memory for audio buffer");
    return;
  }

  setup_wifi();
  setup_button();
  setup_i2s();
  audio.setPinout(I2S_BCLK, I2S_LRC, I2S_DOUT);
  audio.setVolume(100);

  if (!LittleFS.begin(true)) {
    Serial.println("An error has occurred while mounting LittleFS");
    return;
  }

  xQueue = xQueueCreate(10, sizeof(bool));
  xTaskCreate(record_audio_task, "RecordAudioTask", 16384, NULL, 1, NULL);
}

void loop() {
  audio.loop();
}

void setup_wifi() {
  WiFi.begin(ssid, password);
  while (WiFi.status() != WL_CONNECTED) {
    delay(500);
    Serial.println("Connecting to WiFi...");
  }
  Serial.println("Connected to WiFi");
}

void setup_button() {
    pinMode(BUTTON_PIN, INPUT_PULLUP);
    attachInterrupt(digitalPinToInterrupt(BUTTON_PIN), button_isr_handler, CHANGE);
}

void setup_i2s() {
  i2s_config_t i2s_config = {
    .mode                 = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_RX | I2S_MODE_PDM ),
    .sample_rate          = SAMPLE_RATE,
    .bits_per_sample      = I2S_BITS_PER_SAMPLE_16BIT,
    .channel_format       = I2S_CHANNEL_FMT_ONLY_LEFT,
    .communication_format = I2S_COMM_FORMAT_STAND_I2S,   
    .intr_alloc_flags     = ESP_INTR_FLAG_LEVEL1,
    .dma_buf_count        = 8,
    .dma_buf_len          = 512,    
    .use_apll             = false,
    .tx_desc_auto_clear   = false,
    .fixed_mclk           = 0
  };
  
  i2s_pin_config_t pin_config = {
    .bck_io_num = -1,    // IIS_SCLK
    .ws_io_num = 42,     // IIS_LCLK
    .data_out_num = -1,  // IIS_DSIN
    .data_in_num = 41,   // IIS_DOUT
  };

  i2s_driver_install((i2s_port_t)0, &i2s_config, 0, NULL);
  if (!ESP_OK) {
    Serial.printf("Error in i2s_driver_install");
  }

  i2s_set_pin((i2s_port_t)0, &pin_config);
  if (!ESP_OK) {
    Serial.printf("Error in i2s_set_pin");
  }

  i2s_zero_dma_buffer((i2s_port_t)0);
  if (!ESP_OK) {
    Serial.printf("Error in initializing dma buffer with 0");
  }
}

void IRAM_ATTR button_isr_handler() {
    unsigned long interruptTime = millis();
    if (interruptTime - lastDebounceTime > debounceDelay) {
        bool currentButtonState = digitalRead(BUTTON_PIN) == LOW;
        if (currentButtonState != isRecording) {
            isRecording = currentButtonState;
            lastDebounceTime = interruptTime;
            xQueueSendFromISR(xQueue, &isRecording, NULL);
        }
    }
}

void record_audio_task(void *param) {
    bool shouldRecord = false;
    bool currentlyRecording = false;
    Serial.println("Record audio task started.");

    while (true) {
        // Check for recording state updates
        while (xQueueReceive(xQueue, &shouldRecord, 0) == pdTRUE) {
            if (shouldRecord && !currentlyRecording) {
                currentlyRecording = true;
                Serial.println("Starting recording...");
                audioBufferIndex = WAV_HEADER_SIZE;  // Reset index for new recording, reserving space for WAV header
            } else if (!shouldRecord && currentlyRecording) {
                currentlyRecording = false;
                Serial.println("Stopping recording.");
                // Update WAV header with actual data size and prepare to send data
                generate_wav_header(audioBuffer, audioBufferIndex - WAV_HEADER_SIZE, SAMPLE_RATE);
                sendPostFlag = true;  // Set flag to indicate data is ready to be sent
            }
        }

        if (currentlyRecording) {
            size_t bytesRead = 0;
            TickType_t i2sReadTimeoutTicks = 1;  // 1 tick timeout for minimal blocking

            // Attempt to read audio data from I2S with minimal blocking
            esp_err_t result = i2s_read((i2s_port_t)0, audioBuffer + audioBufferIndex, MAX_AUDIO_BUFFER_SIZE - audioBufferIndex, &bytesRead, i2sReadTimeoutTicks);

            if (result == ESP_OK && bytesRead > 0) {
                audioBufferIndex += bytesRead;
                // Check for buffer overflow
                if (audioBufferIndex >= MAX_AUDIO_BUFFER_SIZE) {
                    currentlyRecording = false;
                    Serial.println("Max recording length reached, stopping recording.");
                    // Update WAV header with actual data size and prepare to send data
                    generate_wav_header(audioBuffer, audioBufferIndex - WAV_HEADER_SIZE, SAMPLE_RATE);
                    sendPostFlag = true;  // Set flag to indicate data is ready to be sent
                }
            }

            // Immediately check the queue again to see if recording should stop
            if (xQueueReceive(xQueue, &shouldRecord, 0) == pdTRUE && !shouldRecord) {
                currentlyRecording = false;
                Serial.println("Stopping recording via queue message.");
                generate_wav_header(audioBuffer, audioBufferIndex - WAV_HEADER_SIZE, SAMPLE_RATE);
                sendPostFlag = true;  // Prepare to send data
            }

            // Use a short delay to yield to other tasks
            vTaskDelay(1 / portTICK_PERIOD_MS);
        } else {
            // If not recording, check less frequently
            vTaskDelay(10 / portTICK_PERIOD_MS);
        }

        // Check if the audio data is ready to be sent
        if (sendPostFlag) {
            send_audio_data(audioBuffer, audioBufferIndex);  // Send the recorded audio data
            audioBufferIndex = WAV_HEADER_SIZE;  // Reset index for the next recording
            sendPostFlag = false;  // Reset the flag
        }
    }
}

void send_audio_data(uint8_t *data, size_t length) {
  if (WiFi.status() == WL_CONNECTED) {
    if (!http.connected()) { // Only begin a new connection if not already connected
      // http.begin("https://audio-server.deno.dev/api/audio");
      http.begin("http://192.168.1.137:8000/api/audio");
      http.addHeader("Content-Type", "audio/wav");
    }

    http.setTimeout(30000); // Set timeout before the request
    Serial.println("Sending audio data...");
    int httpResponseCode = http.POST(data, length);

    if (httpResponseCode > 0) {
    File file = LittleFS.open("/response.mp3", FILE_WRITE);
    if (file) {
      http.writeToStream(&file);
      file.close();
      Serial.println("MP3 saved");
    } else {
      Serial.println("Failed to open file for writing");
    }
  } else {
    Serial.print("Error on sending POST: ");
    Serial.println(httpResponseCode);
  }
    audio.connecttoFS(LittleFS, "/response.mp3");
  }
}

void generate_wav_header(uint8_t *wav_header, uint32_t wav_size, uint32_t sample_rate)
{
  uint32_t file_size = wav_size + WAV_HEADER_SIZE - 8;
  uint32_t byte_rate = SAMPLE_RATE * SAMPLE_BITS / 8;
  const uint8_t set_wav_header[] = {
    'R', 'I', 'F', 'F', // ChunkID
    file_size, file_size >> 8, file_size >> 16, file_size >> 24, // ChunkSize
    'W', 'A', 'V', 'E', // Format
    'f', 'm', 't', ' ', // Subchunk1ID
    0x10, 0x00, 0x00, 0x00, // Subchunk1Size (16 for PCM)
    0x01, 0x00, // AudioFormat (1 for PCM)
    0x01, 0x00, // NumChannels (1 channel)
    sample_rate, sample_rate >> 8, sample_rate >> 16, sample_rate >> 24, // SampleRate
    byte_rate, byte_rate >> 8, byte_rate >> 16, byte_rate >> 24, // ByteRate
    0x02, 0x00, // BlockAlign
    0x10, 0x00, // BitsPerSample (16 bits)
    'd', 'a', 't', 'a', // Subchunk2ID
    wav_size, wav_size >> 8, wav_size >> 16, wav_size >> 24, // Subchunk2Size
  };
  memcpy(wav_header, set_wav_header, sizeof(set_wav_header));
}