diff --git a/ChatTTS/core.py b/ChatTTS/core.py index 4ab901659..a6195acb6 100644 --- a/ChatTTS/core.py +++ b/ChatTTS/core.py @@ -415,7 +415,12 @@ def _infer( else: yield wavs if stream: - yield wavs[:, length:] + new_wavs = wavs[:, length:] + # Identify rows with non-zero elements using np.any + # keep_rows = np.any(array != 0, axis=1) + keep_cols = np.sum(new_wavs != 0, axis=0) > 0 + # Filter both rows and columns using slicing + yield new_wavs[:][:, keep_cols] @torch.inference_mode() def _vocos_decode(self, spec: torch.Tensor) -> np.ndarray: