From 9ab50cea24008122300f9ba616a5263b7dfdf253 Mon Sep 17 00:00:00 2001 From: Hoon Date: Fri, 28 Jun 2024 15:38:02 +0900 Subject: [PATCH] =?UTF-8?q?=E3=84=B9=20SileroVADModel?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- faster_whisper/vad.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/faster_whisper/vad.py b/faster_whisper/vad.py index 5510884e..b3a0fb2a 100644 --- a/faster_whisper/vad.py +++ b/faster_whisper/vad.py @@ -256,6 +256,8 @@ def get_initial_states(self, batch_size: int): return state, context def __call__(self, x, states, sr: int): + state, context = states + if len(x.shape) == 1: x = np.expand_dims(x, 0) if len(x.shape) > 2: @@ -264,8 +266,6 @@ def __call__(self, x, states, sr: int): ) if sr / x.shape[1] > 31.25: raise ValueError("Input audio chunk is too short") - - state, context = states ort_inputs = { "input": x, @@ -274,6 +274,5 @@ def __call__(self, x, states, sr: int): } out, state = self.session.run(None, ort_inputs) - out = np.array(out, dtype='float32') - return out, state + return out, states