From 519e2467f86b323de69b11ce5247618030695e68 Mon Sep 17 00:00:00 2001 From: DavidGOrtega Date: Wed, 22 Nov 2023 18:57:23 +0100 Subject: [PATCH] TTS improvements (#19) * wip * prerelease to be checked once error handling is fixed * remove unused * remove package * remove duration * fixed chunks 0 * reorganise * refactor * remove json --- packages/008/.gitignore | 1 + packages/008/src/008Q.js | 37 +++++-- packages/008/src/008QWorker.js | 4 +- packages/008/src/components/Phone/index.js | 123 +++++++++++++-------- 4 files changed, 108 insertions(+), 57 deletions(-) diff --git a/packages/008/.gitignore b/packages/008/.gitignore index 656f852..d0130e0 100644 --- a/packages/008/.gitignore +++ b/packages/008/.gitignore @@ -47,3 +47,4 @@ yarn-error.* /web/*.wav /web/*.ogg /web/*.mp3 +/web/*.bin diff --git a/packages/008/src/008Q.js b/packages/008/src/008Q.js index 82d45ea..a4234c2 100644 --- a/packages/008/src/008Q.js +++ b/packages/008/src/008Q.js @@ -5,20 +5,24 @@ const CACHE = {}; const S3Q = 'https://kunziteq.s3.gra.perf.cloud.ovh.net'; export const wavBytes = async ({ chunks }) => { - // TODO: flatten 2 channels - let arrayBuffer = await chunks[0].arrayBuffer(); - const audioContext = new AudioContext(); - const audioBuffer = await audioContext.decodeAudioData(arrayBuffer); - const wavBlob = new Blob([toWav(audioBuffer)], { type: 'audio/wav' }); + const buffer = await new Blob(chunks).arrayBuffer(); - arrayBuffer = await wavBlob.arrayBuffer(); + const audioContext = new AudioContext({ + sampleRate: 16000, + channelCount: 1, + echoCancellation: true, + autoGainControl: true, + noiseSuppression: true + }); - return new Uint8Array(arrayBuffer); + const resampled = await audioContext.decodeAudioData(buffer); + return new Uint8Array(toWav(resampled)); }; export const ttsInfer = async ({ chunks, url, + onStream, audio = [], bin = `${S3Q}/ttsb.bin`, data = `${S3Q}/tts.json` @@ -41,14 +45,31 @@ export const ttsInfer = async ({ if (url) audio = await fetchBytes(url); if (chunks) audio = await wavBytes({ chunks }); + const consolelog = console.log; + console.log = () => {}; + const consolewarn = console.warn; + console.warn = () => {}; + await whisper.default(); const builder = new whisper.SessionBuilder(); const session = await builder.setModel(model).setTokenizer(tokenizer).build(); - const { segments } = await session.run(audio); + let segments = []; + + if (onStream) { + await session.stream(audio, false, segment => { + onStream?.(segment); + segments.push(segments); + }); + } else { + ({ segments } = await session.run(audio)); + } session.free(); + console.warn = consolewarn; + console.log = consolelog; + return segments; }; diff --git a/packages/008/src/008QWorker.js b/packages/008/src/008QWorker.js index 9b5545b..2a8da73 100755 --- a/packages/008/src/008QWorker.js +++ b/packages/008/src/008QWorker.js @@ -4,8 +4,8 @@ let BUSY = false; const QUEUE = []; const process = async () => { - console.log('[008Q] Processing...'); - if (BUSY || !QUEUE.length) return; + // TODO: put back busy check once error handling is fixed + if (!QUEUE.length) return; try { BUSY = true; diff --git a/packages/008/src/components/Phone/index.js b/packages/008/src/components/Phone/index.js index 73a54b4..57a2468 100644 --- a/packages/008/src/components/Phone/index.js +++ b/packages/008/src/components/Phone/index.js @@ -1,6 +1,7 @@ -import _ from 'lodash'; import React from 'react'; import { View } from 'react-native'; + +import _ from 'lodash'; import { UA } from 'sip.js'; import { @@ -409,6 +410,8 @@ class Phone extends React.Component { }; processRecording = ({ session }) => { + const { webhooks } = this.state; + const type = 'audio/webm'; const chunksBlob = chunks => { @@ -417,62 +420,84 @@ class Phone extends React.Component { return blobToDataURL(new Blob(chunks, { type })); }; - const streamIn = new MediaStream(); - const streamOut = new MediaStream(); - let recorder; const chunks = []; + const streamIn = new MediaStream(); let recorderIn; const chunksIn = []; + const streamOut = new MediaStream(); let recorderOut; const chunksOut = []; session.on('accepted', async () => { - const { peerConnection } = session.sessionDescriptionHandler; - const audioContext = new AudioContext(); - const multi = audioContext.createMediaStreamDestination(); - - const addTracks = (tracks, stream, recorder, chunks) => - tracks.forEach(({ track }) => { - stream.addTrack(track); - - const src = audioContext.createMediaStreamSource(stream); - src.connect(multi); - - recorder = new MediaRecorder(stream); - recorder.ondataavailable = ({ data }) => chunks.push(data); - recorder.start(); - }); + try { + const { peerConnection } = session.sessionDescriptionHandler; + const audioContext = new AudioContext(); + const multi = audioContext.createMediaStreamDestination(); + + const addTracks = (tracks, stream, recorder, chunks) => + tracks.forEach(({ track }) => { + stream.addTrack(track); + + const src = audioContext.createMediaStreamSource(stream); + src.connect(multi); + + recorder = new MediaRecorder(stream); + recorder.ondataavailable = ({ data }) => chunks.push(data); + recorder.start(); + recorder.tsStart = Date.now(); + }); - addTracks(peerConnection.getSenders(), streamOut, recorderOut, chunksOut); - addTracks(peerConnection.getReceivers(), streamIn, recorderIn, chunksIn); - - recorder = new MediaRecorder(multi.stream, { mimeType: type }); - recorder.ondataavailable = ({ data }) => chunks.push(data); - recorder.onstop = async () => { - const id = session.cdr?.id; - const blob = await chunksBlob(chunks); - this.emit({ type: 'phone:recording', data: { audio: { id, blob } } }); - - this.qworker.postMessage({ - id, - audio: { - remote: await wavBytes({ chunks: chunksIn }), - local: await wavBytes({ chunks: chunksOut }) + addTracks( + peerConnection.getReceivers(), + streamIn, + recorderIn, + chunksIn + ); + addTracks( + peerConnection.getSenders(), + streamOut, + recorderOut, + chunksOut + ); + + recorder = new MediaRecorder(multi.stream, { mimeType: type }); + recorder.ondataavailable = ({ data }) => chunks.push(data); + recorder.onstop = async () => { + try { + const id = session.cdr?.id; + const blob = await chunksBlob(chunks); + this.emit({ + type: 'phone:recording', + data: { audio: { id, blob } } + }); + + if (webhooks?.length) { + this.qworker.postMessage({ + id, + audio: { + remote: await wavBytes({ chunks: chunksIn }), + local: await wavBytes({ chunks: chunksOut }) + } + }); + } + } catch (err) { + console.log(err); } - }); - }; - - recorder.start(); - }); + }; - session.on('terminated', () => { - recorder?.stop(); + session.on('terminated', () => { + recorder.stop(); + recorderIn.stop(); + recorderOut.stop(); + }); - recorderIn?.stop(); - recorderOut?.stop(); + recorder.start(); + } catch (err) { + console.error(err); + } }); }; @@ -517,11 +542,13 @@ class Phone extends React.Component { nickname, avatar, - allowAutoanswer, - autoanswer, allowTransfer, allowBlindTransfer, allowVideo, + allowAutoanswer, + autoanswer, + + webhooks, contactsDialer: contacts, contactsDialerFilter: contactsFilter @@ -546,11 +573,13 @@ class Phone extends React.Component { nickname, avatar, - allowAutoanswer, - autoanswer, allowBlindTransfer, allowTransfer, allowVideo, + allowAutoanswer, + autoanswer, + + webhooks, contacts, contactsFilter