From 9ab50cea24008122300f9ba616a5263b7dfdf253 Mon Sep 17 00:00:00 2001
From: Hoon <kihoon.kim.dev@gmail.com>
Date: Fri, 28 Jun 2024 15:38:02 +0900
Subject: [PATCH] =?UTF-8?q?=E3=84=B9=20SileroVADModel?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 faster_whisper/vad.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/faster_whisper/vad.py b/faster_whisper/vad.py
index 5510884e..b3a0fb2a 100644
--- a/faster_whisper/vad.py
+++ b/faster_whisper/vad.py
@@ -256,6 +256,8 @@ def get_initial_states(self, batch_size: int):
         return state, context
 
     def __call__(self, x, states, sr: int):
+        state, context = states
+
         if len(x.shape) == 1:
             x = np.expand_dims(x, 0)
         if len(x.shape) > 2:
@@ -264,8 +266,6 @@ def __call__(self, x, states, sr: int):
             )
         if sr / x.shape[1] > 31.25:
             raise ValueError("Input audio chunk is too short")
-        
-        state, context = states
 
         ort_inputs = {
             "input": x,
@@ -274,6 +274,5 @@ def __call__(self, x, states, sr: int):
         }
 
         out, state = self.session.run(None, ort_inputs)
-        out = np.array(out, dtype='float32')
 
-        return out, state
+        return out, states