From 1a40f9c21c3afd1e6b5a32471c497a9edc7c16b1 Mon Sep 17 00:00:00 2001 From: Binbin Zhang Date: Sun, 5 Nov 2023 09:02:01 +0800 Subject: [PATCH] normalize wav --- wespeaker/cli/speaker.py | 1 + 1 file changed, 1 insertion(+) diff --git a/wespeaker/cli/speaker.py b/wespeaker/cli/speaker.py index 3c80a3af..fcb27c23 100644 --- a/wespeaker/cli/speaker.py +++ b/wespeaker/cli/speaker.py @@ -32,6 +32,7 @@ def __init__(self, model_path: str, resample_rate: int = 16000): def extract_embedding(self, audio_path: str): pcm, sample_rate = librosa.load(audio_path, sr=self.resample_rate) + pcm = pcm * (1 << 15) # NOTE: produce the same results as with torchaudio.compliance.kaldi feats = logfbank( pcm,