Support Whisper-PMFA

wenet-e2e · Aug 30, 2024 · c6fd891 · c6fd891
1 parent fa0179b
commit c6fd891
Show file tree

Hide file tree

Showing 2 changed files with 11 additions and 11 deletions.
diff --git a/wespeaker/frontend/whisper_encoder.py b/wespeaker/frontend/whisper_encoder.py
@@ -80,14 +80,15 @@ def forward(
         q = self.query(x)
 
         if kv_cache is None or xa is None or self.key not in kv_cache:
-            # hooks, if installed (i.e. kv_cache is not None), will prepend the cached kv tensors;
-            # otherwise, perform key/value projections for self- or
+            # hooks, if installed (i.e. kv_cache is not None),
+            # will prepend the cached kv tensors; otherwise,
+            # perform key/value projections for self- or
             # cross-attention as usual.
             k = self.key(x if xa is None else xa)
             v = self.value(x if xa is None else xa)
         else:
-            # for cross-attention, calculate keys and values once and reuse in
-            # subsequent calls.
+            # for cross-attention, calculate keys and values once
+            # and reuse in subsequent calls.
             k = kv_cache[self.key]
             v = kv_cache[self.value]
 
@@ -192,9 +193,9 @@ def forward(self, x: Tensor):
         x = F.gelu(self.conv2(x))
         x = x.permute(0, 2, 1)
 
-        # assert x.shape[1:] == self.positional_embedding.shape, "incorrect audio shape"
-        # ----------------------Change:Tailor the positional_embedding----------
-        assert x.shape[2:] == self.positional_embedding.shape[1:], "incorrect audio shape"
+        # ------------Change:Tailor the positional_embedding----------
+        assert x.shape[2:] == self.positional_embedding.shape[1:], \
+            "incorrect audio shape"
         if self.positional_embedding.shape[0] > x.shape[1]:
             temp_positional_embedding = self.positional_embedding[:x.shape[1], :]
         elif self.positional_embedding.shape[0] < x.shape[1]:
@@ -266,7 +267,9 @@ def _download_whisper_model(self, model_path='whisper_hub/large-v2.pt'):
             os.makedirs(download_dir)
         if not os.path.isfile(model_path):
             print("Downloading large-v2.pt ...")
-            url = 'https://openaipublic.azureedge.net/main/whisper/models/81f7c96c852ee8fc832187b0132e569d6c3065a3252ed18e56effd0b6a73e524/large-v2.pt'
+            url = 'https://openaipublic.azureedge.net/main/whisper/models/' \
+                '81f7c96c852ee8fc832187b0132e569d6c3065a3252ed18e56effd0b6a73e524/' \
+                'large-v2.pt'
 
             urllib.request.urlretrieve(url, model_path)
 

diff --git a/wespeaker/models/whisper_PMFA.py b/wespeaker/models/whisper_PMFA.py
@@ -12,10 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
 import torch
-import torch.nn.functional as F
-from torch import Tensor
 from torch import nn
 
 import wespeaker.models.pooling_layers as pooling_layers