Skip to content

Javascript bookmarklet (and other demos) using Silero VAD to accelerate parts of a video where there is no spoken content.

License

Notifications You must be signed in to change notification settings

Quantizr/SileroSilenceSkipperDemo

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

21 Commits
 
 
 
 
 
 
 
 

Repository files navigation

SileroSilenceSkipperDemo

Javascript bookmarklet (and other demos) using Silero VAD to accelerate parts of a video where there is no spoken content.

Browser extensions like SilenceSkipper, which rely on volume thresholds, cannot detect the difference between music and dialogue. As such, they often slow down for loud music and speed up for quiet talking, the opposite of what we want. Silero VAD is great at identifying when a person is talking, even with music or other noises in the background, making it ideal for watching content where spoken content is most important, for example, lectures or podcasts.

NEW: How To use

Currently works only on Chromium based browsers (Google Chrome, Microsoft Edge, Brave, etc.)

  1. Select all the following and drag it to your bookmarks bar (Ctrl + Shift + B to unhide bookmarks bar if you have it hidden)
javascript: (() => {
if(window.VADInitialized)console.log("VAD already intialized, resetting..."),window.resetVAD();else{window.VADInitialized=!0;let e,t,n,o=3,l=1.5,d=.5,i=.5,a=2,c=1,r=new AudioContext({sampleRate:44100}),p=0,u=0,m=!1,y=null,g=0;function loadScript1(){var e=document.createElement("script");e.src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.js",document.head.appendChild(e),console.log("Script 1 loaded"),e.addEventListener("load",loadScript2)}function loadScript2(){var e=document.createElement("script");e.src="https://cdn.jsdelivr.net/npm/@ricky0123/vad-web/dist/bundle.min.js",document.head.appendChild(e),console.log("Script 2 loaded"),e.addEventListener("load",runVAD)}function runVAD(){function s(){if(m){r.resume(),t=new MediaStreamAudioSourceNode(r,{mediaStream:e.captureStream()});const o=r.createChannelMerger(1);t.connect(o),n.receive(o)}}e=document.querySelector("video"),async function(){m||(n=await vad.AudioNodeVAD.new(r,{frameSamples:512,positiveSpeechThreshold:1,negativeSpeechThreshold:0,onFrameProcessed:t=>{if(t.isSpeech>d)if(c!=l){c=l,e.playbackRate=c,p=0;const t=e.currentTime;if(null!==y){const e=Math.round(1e3*(t-y));console.log(`Sped up video time: ${e}ms`),g+=Math.round(e*(o-1)/(o*l))}console.log("Speech started - slowing down - time saved ",g/1e3)}else p>0&&p--;t.isSpeech<i&&c!=o&&(p++,p>=a&&(c=o,e.playbackRate=c,console.log("End of speech - speeding up"),u++,u>=10&&(e.currentTime=e.currentTime,u=0,console.log("Resync video/audio")),y=e.currentTime)),e.playbackRate!=c&&(console.log("FALLBACK: Set playback speed"),e.playbackRate=c)}}));if(0){alert("Hacky way to add bookmarklet title: /VAD Silence Skipper;");}}().then((()=>{m=!0,null===e?(console.log("Unable to find video..."),C.textContent="Unable to find video..."):(C.style.display="none",E.style.display="block",b.style.display="block"),e.paused||t||(s(),n.start())})),e.addEventListener("play",(function(){y=null,console.log("Play event"),t||s(),p=-2,n.start()})),e.addEventListener("pause",(()=>{y=null,console.log("Pause event"),n.pause()})),e.addEventListener("seeked",(()=>{y=null,console.log("Seeked event")})),e.addEventListener("loadeddata",(()=>{y=null,console.log("LoadedData event"),s()}))}loadScript1();const v=document.createElement("div");v.style.position="fixed",v.style.bottom="10px",v.style.right="10px",v.style.zIndex="9999",v.style.backgroundColor="rgba(255, 255, 255, 0.5)",v.style.padding="10px",v.style.borderRadius="5px",v.style.boxShadow="0 0 10px rgba(0, 0, 0, 0.3)",document.body.appendChild(v);const h=document.createElement("div");h.style.display="flex",h.style.alignItems="center",h.style.justifyContent="space-between",h.style.cursor="move";const S=document.createElement("div");S.textContent="VAD Script Settings",S.style.fontSize="14px",S.style.fontWeight="bold",h.appendChild(S);const x=document.createElement("button");x.textContent="-",x.style.cursor="pointer",x.style.marginLeft="10px",x.addEventListener("click",toggleMinimize),h.appendChild(x),v.appendChild(h);const C=document.createElement("div");C.textContent="Loading VAD scripts...",C.style.fontSize="14px",v.appendChild(C);const E=createSlider("Speech Speed",1,5,l,.25,(e=>s(e)));E.style.display="none",v.appendChild(E);const b=createSlider("Skip Speed",1,5,o,.25,(e=>f(e)));b.style.display="none",v.appendChild(b),h.addEventListener("mousedown",startDrag),document.addEventListener("mousemove",handleDrag),document.addEventListener("mouseup",endDrag);let w,L,D=!1;function startDrag(e){D=!0,w=e.clientX-v.getBoundingClientRect().left,L=e.clientY-v.getBoundingClientRect().top}function handleDrag(e){if(D){const t=e.clientX-w,n=e.clientY-L;v.style.left=`${t}px`,v.style.top=`${n}px`,v.style.bottom="auto",v.style.right="auto"}}function endDrag(){D=!1;const e=v.getBoundingClientRect().top,t=v.getBoundingClientRect().left,n=document.documentElement.clientHeight-v.getBoundingClientRect().bottom,o=document.documentElement.clientWidth-v.getBoundingClientRect().right;e<document.documentElement.clientHeight/2?(v.style.top=`${e}px`,v.style.bottom="auto"):(v.style.top="auto",v.style.bottom=`${n}px`),t<document.documentElement.clientWidth/2?(v.style.left=`${t}px`,v.style.right="auto"):(v.style.left="auto",v.style.right=`${o}px`)}function createSlider(e,t,n,o,l,d){const i=document.createElement("div"),a=document.createElement("div");a.style.display="flex",a.style.justifyContent="space-between";const s=document.createElement("div");s.textContent=e,s.style.fontSize="14px",a.appendChild(s);const c=document.createElement("div");c.textContent=`${parseFloat(o).toFixed(2)}x`,c.style.fontSize="14px",a.appendChild(c),i.appendChild(a);const r=document.createElement("input");return r.type="range",r.min=t,r.max=n,r.value=o,r.step=l,r.style.width="150px",r.style.marginBottom="5px",r.addEventListener("input",(()=>{const e=parseFloat(r.value).toFixed(2);c.textContent=`${e}x`,d(e)})),i.appendChild(r),i}function toggleMinimize(){const e=Array.from(v.children).slice(2),t="none"===e[0].style.display;e.forEach((e=>{e.style.display=t?"block":"none"})),x.textContent=t?"-":"+"}function s(e){l=e,y=null}function f(e){o=e,y=null}window.resetVAD=function(){e=document.querySelector("video"),y=null,r.resume(),t=new MediaStreamAudioSourceNode(r,{mediaStream:e.captureStream()});const o=r.createChannelMerger(1);t.connect(o),n.receive(o),n.start(),e.addEventListener("play",(function(){y=null,console.log("Play event"),p=-2,n.start()})),e.addEventListener("pause",(()=>{y=null,console.log("Pause event"),n.pause()})),e.addEventListener("seeked",(()=>{y=null,console.log("Seeked event")})),e.addEventListener("loadeddata",(()=>{y=null,console.log("LoadedData event"),setupVAD()}))}}
})();
  1. Click the bookmark when you are on a page with a video (e.g. YouTube) to accelerate parts of the video without speech (helpful for lectures, presentations, code streams, etc)
    • You will get a draggable overlay with controls in the bottom right
    • Click the bookmark again if the script no longer is tracking the video
    • Reload the page to get rid of the speedup

Alternate method (browser console)

Copy the code from here and paste it into the browser console on a page with a video. This will work on the current page until the page is refreshed (or the video changes).

(Old) Demo:

(don't mind the goofy video, was one of the first example mp4 I could find) https://quantizr.github.io/SileroSilenceSkipperDemo/demo.html

About

Javascript bookmarklet (and other demos) using Silero VAD to accelerate parts of a video where there is no spoken content.

Resources

License

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published