qdrant · NirantK · Oct 16, 2023 · Oct 16, 2023
diff --git a/fastembed/embedding.py b/fastembed/embedding.py
@@ -6,10 +6,10 @@
 from pathlib import Path
 from typing import Dict, Iterable, List, Union
 
-import onnxruntime as ort
 import numpy as np
+import onnxruntime as ort
 import requests
-from tokenizers import Tokenizer, AddedToken
+from tokenizers import AddedToken, Tokenizer
 from tqdm import tqdm
 
 
@@ -54,13 +54,23 @@ def list_supported_models(cls) -> List[Dict[str, Union[str, int]]]:
             {
                 "model": "BAAI/bge-small-en",
                 "dim": 384,
+                "description": "Fast English model",
+            },
+            {
+                "model": "BAAI/bge-small-en-v1.5",
+                "dim": 384,
                 "description": "Fast and Default English model",
             },
             {
                 "model": "BAAI/bge-base-en",
                 "dim": 768,
                 "description": "Base English model",
             },
+            {
+                "model": "BAAI/bge-base-en-v1.5",
+                "dim": 768,
+                "description": "Base English model, v1.5",
+            },
             {
                 "model": "sentence-transformers/all-MiniLM-L6-v2",
                 "dim": 384,
@@ -385,7 +395,7 @@ class DefaultEmbedding(FlagEmbedding):
 
     def __init__(
         self,
-        model_name: str = "BAAI/bge-small-en",
+        model_name: str = "BAAI/bge-small-en-v1.5",
         max_length: int = 512,
         cache_dir: str = None,
     ):

diff --git a/tests/test_onnx_embeddings.py b/tests/test_onnx_embeddings.py
@@ -2,10 +2,11 @@
 
 from fastembed.embedding import DefaultEmbedding, Embedding
 
-
 CANONICAL_VECTOR_VALUES = {
     "BAAI/bge-small-en": np.array([-0.0232, -0.0255,  0.0174, -0.0639, -0.0006]),
+    "BAAI/bge-small-en-v1.5": np.array([0.01522374, -0.02271799,  0.00860278, -0.07424029,  0.00386434]),
     "BAAI/bge-base-en": np.array([0.0115,  0.0372,  0.0295,  0.0121,  0.0346]),
+    "BAAI/bge-base-en-v1.5": np.array([0.01129394, 0.05493144, 0.02615099, 0.00328772, 0.02996045]),
     "sentence-transformers/all-MiniLM-L6-v2": np.array([0.0259,  0.0058,  0.0114,  0.0380, -0.0233]),
     "intfloat/multilingual-e5-large": np.array([0.0098,  0.0045,  0.0066, -0.0354,  0.0070]),
 }
@@ -22,7 +23,7 @@ def test_default_embedding():
         assert embeddings.shape == (2, dim)
 
         canonical_vector = CANONICAL_VECTOR_VALUES[model_desc["model"]]
-        assert np.allclose(embeddings[0, :canonical_vector.shape[0]], canonical_vector, atol=1e-3)
+        assert np.allclose(embeddings[0, :canonical_vector.shape[0]], canonical_vector, atol=1e-3), model_desc["model"]
 
 
 def test_batch_embedding():