#include <Arduino.h>
#include <WiFi.h>
#include <WiFiClientSecure.h>
#include <HTTPClient.h>
#include <SPI.h>
#include <SD.h>
#include <ArduinoJson.h>
#include <ESP_I2S.h>
#include "WitAITTS.h"
//CHANGE THESE \/\/\/\/\/
#define WIFI_SSID_PLACEHOLDER "SSID_PLACEHOLDER"
#define WIFI_PASSWORD_PLACEHOLDER "PASSWORD_PLACEHOLDER"
#define GROQ_API_KEY_PLACEHOLDER "GROQ_API_KEY_PLACEHOLDER"
#define WIT_TOKEN_PLACEHOLDER "WIT_TOKEN_PLACEHOLDER"
#define SD_CS_PIN 21
#define PDM_CLOCK_PIN 42
#define PDM_DATA_PIN 41
#define I2S_DOUT_PIN 4
#define I2S_BCLK_PIN 5
#define I2S_WS_PIN 6
#define SAMPLE_RATE_HZ 16000
const char* WHISPER_HOST = "api.groq.com";
const char* WHISPER_PATH = "/openai/v1/audio/transcriptions";
const char* GROQ_CHAT_URL = "https://api.groq.com/openai/v1/chat/completions"
;
I2SClass pdmRecorder;
WitAITTS ttsEngine(I2S_BCLK_PIN, I2S_WS_PIN, I2S_DOUT_PIN);
volatile bool recordingRequested = false;
volatile bool recordingStopRequested = false;
const char* wifiSsid = WIFI_SSID_PLACEHOLDER;
const char* wifiPassword = WIFI_PASSWORD_PLACEHOLDER;
const char* groqApiKey = GROQ_API_KEY_PLACEHOLDER;
const char* witToken = WIT_TOKEN_PLACEHOLDER;
TaskHandle_t recorderTaskHandle = nullptr;
TaskHandle_t networkTaskHandle = nullptr;
const char* WAV_FILE_PATH = "/r.wav";
inline unsigned long nowMs() { return millis(); }
void dbgPrint(const String &s) { Serial.println(s); }
void writeWavHeader(File &fileHandle, uint32_t sampleRate, uint32_t dataBytes) {
fileHandle.seek(0);
fileHandle.write((const uint8_t*)"RIFF", 4);
uint32_t fileSizeMinus8 = 36 + dataBytes;
fileHandle.write((const uint8_t*)&fileSizeMinus8, 4);
fileHandle.write((const uint8_t*)"WAVE", 4);
fileHandle.write((const uint8_t*)"fmt ", 4);
uint32_t subChunk1Size = 16;
fileHandle.write((const uint8_t*)&subChunk1Size, 4);
uint16_t audioFormat = 1;
uint16_t numChannels = 1;
uint16_t bitsPerSample = 16;
uint32_t byteRate = sampleRate * numChannels * (bitsPerSample / 8);
uint16_t blockAlign = numChannels * (bitsPerSample / 8);
fileHandle.write((const uint8_t*)&audioFormat, 2);
fileHandle.write((const uint8_t*)&numChannels, 2);
fileHandle.write((const uint8_t*)&sampleRate, 4);
fileHandle.write((const uint8_t*)&byteRate, 4);
fileHandle.write((const uint8_t*)&blockAlign, 2);
fileHandle.write((const uint8_t*)&bitsPerSample, 2);
fileHandle.write((const uint8_t*)"data", 4);
fileHandle.write((const uint8_t*)&dataBytes, 4);
}
void recorderTask(void* pv) {
const size_t READ_BUF_SZ = 256;
uint8_t readBuffer[READ_BUF_SZ];
for (;;) {
if (!recordingRequested) {
vTaskDelay(pdMS_TO_TICKS(20));
continue;
}
recordingRequested = false;
recordingStopRequested = false;
File wavFile = SD.open(WAV_FILE_PATH, FILE_WRITE);
if (!wavFile) {
dbgPrint("[REC] WAV open failed");
vTaskDelay(pdMS_TO_TICKS(500));
continue;
}
uint8_t zero44[44] = {0};
wavFile.write(zero44, 44);
pdmRecorder.setPinsPdmRx(PDM_CLOCK_PIN, PDM_DATA_PIN);
pdmRecorder.begin(I2S_MODE_PDM_RX, SAMPLE_RATE_HZ, I2S_DATA_BIT_WIDTH_16BIT, I2S_SLOT_MODE_MONO);
size_t totalBytes = 0;
unsigned long startMs = nowMs();
while (!recordingStopRequested) {
int r = pdmRecorder.readBytes((char*)readBuffer, READ_BUF_SZ);
if (r > 0) {
wavFile.write(readBuffer, r);
totalBytes += (size_t)r;
}
vTaskDelay(pdMS_TO_TICKS(2));
}
pdmRecorder.end();
writeWavHeader(wavFile, SAMPLE_RATE_HZ, (uint32_t)totalBytes);
wavFile.close();
dbgPrint("[REC] Finished bytes=" + String(totalBytes) + " ms=" + String(nowMs() - startMs));
if (networkTaskHandle) xTaskNotifyGive(networkTaskHandle);
}
}
bool streamFileToWhisper(String &outTranscript) {
File wavFile = SD.open(WAV_FILE_PATH);
if (!wavFile) {
dbgPrint("[WHISPER] WAV open failed");
return false;
}
WiFiClientSecure client;
client.setInsecure();
if (!client.connect(WHISPER_HOST, 443)) {
dbgPrint("[WHISPER] TLS connect failed");
wavFile.close();
return false;
}
String boundary = "----ESP32BOUNDARY";
String head =
"--" + boundary + "\r\n"
"Content-Disposition: form-data; name=\"model\"\r\n\r\n"
"whisper-large-v3-turbo\r\n"
"--" + boundary + "\r\n"
"Content-Disposition: form-data; name=\"file\"; filename=\"audio.wav\"\r\n"
"Content-Type: audio/wav\r\n\r\n";
String tail = "\r\n--" + boundary + "--\r\n";
uint32_t contentLength = head.length() + (uint32_t)wavFile.size() + tail.length();
client.printf("POST %s HTTP/1.1\r\n", WHISPER_PATH);
client.printf("Host: %s\r\n", WHISPER_HOST);
client.printf("Authorization: Bearer %s\r\n", groqApiKey);
client.printf("Content-Type: multipart/form-data; boundary=%s\r\n", boundary.c_str());
client.printf("Content-Length: %u\r\n", contentLength);
client.print("\r\n");
client.print(head);
uint8_t buffer[512];
while (wavFile.available()) {
int r = wavFile.read(buffer, sizeof(buffer));
if (r <= 0) break;
client.write(buffer, r);
}
wavFile.close();
client.print(tail);
String response;
unsigned long lastRead = nowMs();
while (client.connected() || client.available()) {
if (client.available()) {
response += client.readStringUntil('\n');
lastRead = nowMs();
if (response.length() > 128 * 1024) break;
} else {
if (nowMs() - lastRead > 5000) break;
delay(5);
}
}
client.stop();
int jsonStart = response.indexOf('{');
if (jsonStart < 0) {
dbgPrint("[WHISPER] No JSON in response");
return false;
}
String jsonPart = response.substring(jsonStart);
StaticJsonDocument<8192> doc;
DeserializationError err = deserializeJson(doc, jsonPart);
if (err) {
dbgPrint("[WHISPER] JSON parse error");
return false;
}
if (!doc.containsKey("text")) {
dbgPrint("[WHISPER] No text field");
return false;
}
outTranscript = String((const char*)doc["text"]);
dbgPrint("[WHISPER] Transcript: " + outTranscript);
return true;
}
bool queryGroqChat(const String &promptText, String &outReply) {
String historyData = "";
if (SD.cardType() != CARD_NONE && SD.exists("/chat_hist.txt")) {
File histFile = SD.open("/chat_hist.txt", FILE_READ);
while (histFile.available() && historyData.length() < 1000) {
historyData += histFile.readStringUntil('\n') + "\n";
}
histFile.close();
}
WiFiClientSecure client;
client.setInsecure();
HTTPClient http;
if (!http.begin(client, GROQ_CHAT_URL)) {
dbgPrint("[CHAT] HTTP begin failed");
return false;
}
http.addHeader("Content-Type", "application/json");
http.addHeader("Authorization", String("Bearer ") + groqApiKey);
DynamicJsonDocument body(12288);
body["model"] = "openai/gpt-oss-20b";
JsonArray messages = body.createNestedArray("messages");
JsonObject m = messages.createNestedObject();
m["role"] = "user";
// Feel free to customise this prompt as much or as little as you want. Make sure to include the bit about keeping it inder 120 characters, as WitAITTS cuts it off there.
String therapyPrompt =
"Act as a supportive therapy bot focused on social anxiety. Provide short, practical steps, breathing cues, grounding exercises, and empathetic reflections. Keep replies concise and calm and under 120 characters. Avoid medical diagnosis. If a crisis is indicated, advise seeking immediate professional help.";
String fullPrompt = (historyData.length() > 0) ? ("HISTORY:\n" + historyData + "\n") : "";
fullPrompt += therapyPrompt + "\n" + promptText;
if (fullPrompt.length() > 2000) fullPrompt = fullPrompt.substring(fullPrompt.length() - 2000);
m["content"] = fullPrompt;
String payload;
serializeJson(body, payload);
int code = http.POST(payload);
if (code != 200) {
dbgPrint("[CHAT] HTTP Error " + String(code) + ": " + http.getString());
http.end();
return false;
}
String resp = http.getString();
http.end();
DynamicJsonDocument res(12288);
if (deserializeJson(res, resp)) {
dbgPrint("[CHAT] JSON parse error");
return false;
}
if (res["choices"][0]["message"].containsKey("content")) {
outReply = res["choices"][0]["message"]["content"].as<String>();
if (SD.cardType() != CARD_NONE) {
File f = SD.open("/chat_hist.txt", FILE_WRITE);
if (f) {
f.println("U: " + promptText);
f.println("A: " + outReply);
f.close();
}
}
return true;
}
return false;
}
void handleAiReply(const String &replyText) {
dbgPrint("[AI] Reply: " + replyText);
ttsEngine.speak(replyText);
}
void networkTask(void* pv) {
for (;;) {
ulTaskNotifyTake(pdTRUE, portMAX_DELAY);
String transcript;
if (!streamFileToWhisper(transcript)) {
dbgPrint("[NET] Transcription failed");
continue;
}
String aiReply;
if (!queryGroqChat(transcript, aiReply)) {
dbgPrint("[NET] Chat failed");
continue;
}
handleAiReply(aiReply);
}
}
void connectToWiFi() {
WiFi.disconnect(true);
WiFi.mode(WIFI_STA);
WiFi.begin(wifiSsid, wifiPassword);
unsigned long start = millis();
while (WiFi.status() != WL_CONNECTED && millis() - start < 30000) {
delay(500);
yield();
}
if (WiFi.status() == WL_CONNECTED) {
dbgPrint("[WIFI] Connected");
} else {
dbgPrint("[WIFI] Connection failed");
}
}
void setup() {
Serial.begin(115200);
delay(1000);
SPI.begin();
if (!SD.begin(SD_CS_PIN)) {
dbgPrint("[SETUP] SD init failed");
}
connectToWiFi();
ttsEngine.begin(wifiSsid, wifiPassword, witToken);
ttsEngine.setVoice("wit$British Butler");
ttsEngine.setStyle("default");
ttsEngine.setSpeed(100);
ttsEngine.setPitch(100);
ttsEngine.setGain(1.0);
ttsEngine.printConfig();
ttsEngine.setDebugLevel(DEBUG_VERBOSE);
xTaskCreatePinnedToCore(recorderTask, "RecorderTask", 4096, nullptr, 2, &recorderTaskHandle, 1);
xTaskCreatePinnedToCore(networkTask, "NetworkTask", 8192, nullptr, 2, &networkTaskHandle, 1);
ttsEngine.speak("System online");
}
void loop() {
ttsEngine.loop();
}