Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(ui): add page to talk with voice, transcription, and tts #2520

Merged
merged 3 commits into from
Jun 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions core/http/routes/ui.go
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,26 @@ func RegisterUIRoutes(app *fiber.App,
// Render index
return c.Render("views/chat", summary)
})

app.Get("/talk/", auth, func(c *fiber.Ctx) error {
backendConfigs := cl.GetAllBackendConfigs()

if len(backendConfigs) == 0 {
// If no model is available redirect to the index which suggests how to install models
return c.Redirect("/")
}

summary := fiber.Map{
"Title": "LocalAI - Talk",
"ModelsConfig": backendConfigs,
"Model": backendConfigs[0].Name,
"Version": internal.PrintableVersion(),
}

// Render index
return c.Render("views/talk", summary)
})

app.Get("/chat/", auth, func(c *fiber.Ctx) error {

backendConfigs := cl.GetAllBackendConfigs()
Expand Down
191 changes: 191 additions & 0 deletions core/http/static/talk.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@

const recordButton = document.getElementById('recordButton');
const audioPlayback = document.getElementById('audioPlayback');
const resetButton = document.getElementById('resetButton');

let mediaRecorder;
let audioChunks = [];
let isRecording = false;
let conversationHistory = [];
let resetTimer;

function getApiKey() {
return document.getElementById('apiKey').value;
}

function getModel() {
return document.getElementById('modelSelect').value;
}

function getWhisperModel() {
return document.getElementById('whisperModelSelect').value;
}

function getTTSModel() {
return document.getElementById('ttsModelSelect').value;
}

function resetConversation() {
conversationHistory = [];
console.log("Conversation has been reset.");
clearTimeout(resetTimer);
}

function setResetTimer() {
clearTimeout(resetTimer);
resetTimer = setTimeout(resetConversation, 300000); // Reset after 5 minutes
}

recordButton.addEventListener('click', toggleRecording);
resetButton.addEventListener('click', resetConversation);

function toggleRecording() {
if (!isRecording) {
startRecording();
} else {
stopRecording();
}
}

async function startRecording() {
document.getElementById("recording").style.display = "block";
document.getElementById("resetButton").style.display = "none";
if (!navigator.mediaDevices) {
alert('MediaDevices API not supported!');
return;
}
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
mediaRecorder = new MediaRecorder(stream);
audioChunks = [];
mediaRecorder.ondataavailable = (event) => {
audioChunks.push(event.data);
};
mediaRecorder.start();
recordButton.textContent = 'Stop Recording';
// add class bg-red-500 to recordButton
recordButton.classList.add("bg-gray-500");

isRecording = true;
}

function stopRecording() {
mediaRecorder.stop();
mediaRecorder.onstop = async () => {
document.getElementById("recording").style.display = "none";
document.getElementById("recordButton").style.display = "none";

document.getElementById("loader").style.display = "block";
const audioBlob = new Blob(audioChunks, { type: 'audio/webm' });
document.getElementById("statustext").textContent = "Processing audio...";
const transcript = await sendAudioToWhisper(audioBlob);
console.log("Transcript:", transcript);
document.getElementById("statustext").textContent = "Seems you said: " + transcript+ ". Generating response...";
const responseText = await sendTextToChatGPT(transcript);

console.log("Response:", responseText);
document.getElementById("statustext").textContent = "Response generated: '" + responseText + "'. Generating audio response...";

const ttsAudio = await getTextToSpeechAudio(responseText);
playAudioResponse(ttsAudio);

recordButton.textContent = 'Record';
// remove class bg-red-500 from recordButton
recordButton.classList.remove("bg-gray-500");
isRecording = false;
document.getElementById("loader").style.display = "none";
document.getElementById("recordButton").style.display = "block";
document.getElementById("resetButton").style.display = "block";
document.getElementById("statustext").textContent = "Press the record button to start recording.";
};
}

function submitKey(event) {
event.preventDefault();
localStorage.setItem("key", document.getElementById("apiKey").value);
document.getElementById("apiKey").blur();
}

document.getElementById("key").addEventListener("submit", submitKey);


storeKey = localStorage.getItem("key");
if (storeKey) {
document.getElementById("apiKey").value = storeKey;
} else {
document.getElementById("apiKey").value = null;
}


async function sendAudioToWhisper(audioBlob) {
const formData = new FormData();
formData.append('file', audioBlob);
formData.append('model', getWhisperModel());
API_KEY = localStorage.getItem("key");

const response = await fetch('/v1/audio/transcriptions', {
method: 'POST',
headers: {
'Authorization': `Bearer ${API_KEY}`
},
body: formData
});

const result = await response.json();
console.log("Whisper result:", result)
return result.text;
}

async function sendTextToChatGPT(text) {
conversationHistory.push({ role: "user", content: text });
API_KEY = localStorage.getItem("key");

const response = await fetch('/v1/chat/completions', {
method: 'POST',
headers: {
'Authorization': `Bearer ${API_KEY}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({
model: getModel(),
messages: conversationHistory
})
});

const result = await response.json();
const responseText = result.choices[0].message.content;
conversationHistory.push({ role: "assistant", content: responseText });

setResetTimer();

return responseText;
}

async function getTextToSpeechAudio(text) {
API_KEY = localStorage.getItem("key");

const response = await fetch('/v1/audio/speech', {

method: 'POST',
headers: {
'Authorization': `Bearer ${API_KEY}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({
// "backend": "string",
input: text,
model: getTTSModel(),
// "voice": "string"
})
});

const audioBlob = await response.blob();
return audioBlob; // Return the blob directly
}

function playAudioResponse(audioBlob) {
const audioUrl = URL.createObjectURL(audioBlob);
audioPlayback.src = audioUrl;
audioPlayback.hidden = false;
audioPlayback.play();
}

2 changes: 2 additions & 0 deletions core/http/views/partials/navbar.html
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
<a href="/chat/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-comments pr-2"></i> Chat</a>
<a href="/text2image/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-image pr-2"></i> Generate images</a>
<a href="/tts/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-music pr-2"></i> TTS </a>
<a href="/talk/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-phone pr-2"></i> Talk </a>
<a href="/swagger/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-code pr-2"></i> API</a>
</div>
</div>
Expand All @@ -32,6 +33,7 @@
<a href="/chat/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-comments pr-2"></i> Chat</a>
<a href="/text2image/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-image pr-2"></i> Generate images</a>
<a href="/tts/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-music pr-2"></i> TTS </a>
<a href="/talk/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-phone pr-2"></i> Talk </a>
<a href="/swagger/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-code pr-2"></i> API</a>
</div>
</div>
Expand Down
108 changes: 108 additions & 0 deletions core/http/views/talk.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
<!doctype html>
<html lang="en">
{{template "views/partials/head" .}}
<script defer src="/static/talk.js"></script>
<style>
body {
overflow: hidden;
}
</style>
<body class="bg-gray-900 text-gray-200" x-data="{ key: $store.chat.key }">
<div class="flex flex-col min-h-screen">

{{template "views/partials/navbar"}}
<div class="chat-container mt-2 mr-2 ml-2 mb-2 bg-gray-800 shadow-lg rounded-lg " >
<!-- Chat Header -->
<div class="border-b border-gray-700 p-4" x-data="{ component: 'menu' }">

<div class="flex items-center justify-center">

<div x-show="component === 'menu'" id="menu">

<button @click="component = 'key'" title="Update API key"
class="m-2 float-right inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong"
>Set API Key🔑</button>

</div>

<form x-show="component === 'key'" id="key">
<input
type="password"
id="apiKey"
name="apiKey"
class="bg-gray-800 text-white border border-gray-600 focus:border-blue-500 focus:ring focus:ring-blue-500 focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none"
placeholder="API Key"
x-model.lazy="key"
/>
<button @click="component = 'menu'" type="submit" title="Save API key">
<i class="fa-solid fa-arrow-right"></i>
</button>
</form>
</div>
</div>

<div class="flex items-center justify-center">
<div class="w-full p-4 max-w-md border-t border-gray-700 ">
<div class="bg-gray-700 shadow-md rounded px-8 pt-6 pb-8 mb-4">
<div id="recording" class="" style="display: none;">
<i class="fa-solid fa-microphone animate-pulse text-red-700"></i>
<span class="text-white-700 text-sm font-bold mb-2">Recording... press "Stop recording" to stop</span>
</div>
<div id="loader" class="my-2 loader" style="display: none;"></div>
<div id="statustext" class="my-2 p-2 block text-white-700 text-sm font-bold mb-2" ></div>
<div class="mb-4" >
<label for="modelSelect" class="block text-white-700 text-sm font-bold mb-2">LLM Model:</label>
<select id="modelSelect"
class="bg-gray-800 text-white border border-gray-600 focus:border-blue-500 focus:ring focus:ring-blue-500 focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none"
>
<option value="" disabled class="text-gray-400" >Select a model</option>

{{ range .ModelsConfig }}
<option value="{{.Name}}" class="bg-gray-700 text-white">{{.Name}}</option>
{{ end }}
</select>
</div>

<div class="mb-4" >
<label for="whisperModelSelect" class="block text-white-700 text-sm font-bold mb-2">Whisper Model:</label>
<select id="whisperModelSelect"
class="bg-gray-800 text-white border border-gray-600 focus:border-blue-500 focus:ring focus:ring-blue-500 focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none"

>
<option value="" disabled class="text-gray-400" >Select a model</option>

{{ range .ModelsConfig }}
<option value="{{.Name}}" class="bg-gray-700 text-white">{{.Name}}</option>
{{ end }}
</select>
</div>


<div class="mb-4" >
<label for="ttsModelSelect" class="block text-white-700 text-sm font-bold mb-2">TTS Model:</label>
<select id="ttsModelSelect"
class="bg-gray-800 text-white border border-gray-600 focus:border-blue-500 focus:ring focus:ring-blue-500 focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none"
>
<option value="" disabled class="text-gray-400" >Select a model</option>
{{ range .ModelsConfig }}
<option value="{{.Name}}" class="bg-gray-700 text-white">{{.Name}}</option>
{{ end }}
</select>
</div>


<button id="recordButton"
class="bg-red-500 hover:bg-red-700 text-white font-bold py-2 px-4 rounded focus:outline-none focus:shadow-outline"
><i class="fa-solid fa-microphone pr-2"></i>Talk</button>
<a id="resetButton"
class="inline-block align-baseline font-bold text-sm text-blue-500 hover:text-blue-800"
href="#"
>Reset conversation</a>
<audio id="audioPlayback" controls hidden></audio>

</div>
</div>
</div>
</div>
</body>
</html>