diff --git a/faster_whisper/vad.py b/faster_whisper/vad.py index 5510884e..b3a0fb2a 100644 --- a/faster_whisper/vad.py +++ b/faster_whisper/vad.py @@ -256,6 +256,8 @@ def get_initial_states(self, batch_size: int): return state, context def __call__(self, x, states, sr: int): + state, context = states + if len(x.shape) == 1: x = np.expand_dims(x, 0) if len(x.shape) > 2: @@ -264,8 +266,6 @@ def __call__(self, x, states, sr: int): ) if sr / x.shape[1] > 31.25: raise ValueError("Input audio chunk is too short") - - state, context = states ort_inputs = { "input": x, @@ -274,6 +274,5 @@ def __call__(self, x, states, sr: int): } out, state = self.session.run(None, ort_inputs) - out = np.array(out, dtype='float32') - return out, state + return out, states