Up

elixir-nx · Mar 11, 2024 · 4155540 · 4155540
1 parent 185c18a
commit 4155540
Showing 1 changed file with 4 additions and 2 deletions.
diff --git a/lib/bumblebee/audio/speech_to_text_whisper.ex b/lib/bumblebee/audio/speech_to_text_whisper.ex
@@ -306,8 +306,10 @@ defmodule Bumblebee.Audio.SpeechToTextWhisper do
     for {token_id, idx} <- Enum.with_index(token_ids, 1), token_id, do: {idx, token_id}
   end
 
-  # Takes a stream of continous tensor chunks and produces a stream of
-  # overlapping chunks
+  # Takes a stream of continous tensor chunks and produces a stream
+  # of overlapping chunks. As a result we get somewhat overlapping
+  # transcriptions, which we merge at the edges to improve the final
+  # transcription quality.
   defp chunk_input(stream, sampling_rate, chunk_num_seconds, context_num_seconds) do
     chunk_length = floor(chunk_num_seconds * sampling_rate)
     context_left = floor(context_num_seconds * sampling_rate)