From a250dff400cfb6f30368819b9d44119a143bc348 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9CEricZequan=E2=80=9D?= <zequany33@gmail.com>
Date: Sat, 14 Sep 2024 10:38:26 +0800
Subject: [PATCH 1/3] add vector index part in other document
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: “EricZequan” <zequany33@gmail.com>
---
 vector-search-data-types.md                |  9 +++++++--
 vector-search-integrate-with-django-orm.md | 19 ++++++++++++++++++-
 vector-search-integrate-with-peewee.md     | 19 ++++++++++++++++++-
 vector-search-integrate-with-sqlalchemy.md | 14 ++++++++++++++
 vector-search-limitations.md               |  1 +
 5 files changed, 58 insertions(+), 4 deletions(-)

diff --git a/vector-search-data-types.md b/vector-search-data-types.md
index 146fa67db45d..97aab2c602e8 100644
--- a/vector-search-data-types.md
+++ b/vector-search-data-types.md
@@ -14,6 +14,7 @@ summary: 本文介绍 TiDB 的向量数据类型。
 
 与使用 [`JSON`](/data-type-json.md) 类型相比，使用向量类型具有以下优势：
 
+- 支持向量索引。 可以通过构建[向量搜索索引](/vector-search-index.md)加速查询。
 - 可指定维度。指定一个固定维度后，不符合维度的数据将被阻止写入到表中。
 - 存储格式更优。向量数据类型针对向量数据进行了特别优化，在空间利用和性能效率上都优于 `JSON` 类型。
 
@@ -52,7 +53,8 @@ ERROR 1105 (HY000): Invalid vector text: [5, ]
 ERROR 1105 (HY000): vector has 2 dimensions, does not fit VECTOR(3)
 ```
 
-可参阅[向量函数与操作符](/vector-search-functions-and-operators.md)了解向量数据类型支持的所有函数和操作符。
+可参阅 [向量函数与操作符](/vector-search-functions-and-operators.md) 了解向量数据类型支持的所有函数和操作符。
+可参阅 [向量搜索索引](/vector-search-index.md) 了解向量搜索索引的信息。
 
 ## 混合存储不同维度的向量
 
@@ -68,6 +70,8 @@ INSERT INTO vector_table VALUES (1, '[0.3, 0.5, -0.1]'); -- 3 dimensions vector,
 INSERT INTO vector_table VALUES (2, '[0.3, 0.5]');       -- 2 dimensions vector, OK
 ```
 
+但是，我们不能为存储了不同维度的向量列构建 [向量搜索索引](/vector-search-index.md)，因为向量距离只能在具有相同维度的向量之间计算。
+
 ## 比较
 
 [比较运算符](/vector-search-functions-and-operators.md#扩展的内置函数和运算符) 如 `=`, `!=`, `<`, `>`, `<=` 和 `>=` 等都能正常对向量数据进行比较。可参阅[向量函数与操作符](/vector-search-functions-and-operators.md#扩展的内置函数和运算符)了解向量数据类型支持的所有函数和操作符。
@@ -239,4 +243,5 @@ ERROR 1105 (HY000): vectors have different dimensions: 1 and 3
 
 ## 另请参阅
 
-- [向量函数和操作符](/vector-search-functions-and-operators.md)
\ No newline at end of file
+- [向量函数和操作符](/vector-search-functions-and-operators.md)
+- [向量搜索索引](/vector-search-index.md)
\ No newline at end of file
diff --git a/vector-search-integrate-with-django-orm.md b/vector-search-integrate-with-django-orm.md
index 9c7ce2f86691..c279a9bef2d0 100644
--- a/vector-search-integrate-with-django-orm.md
+++ b/vector-search-integrate-with-django-orm.md
@@ -224,6 +224,22 @@ Document.objects.create(content="fish", embedding=[1, 2, 4])
 Document.objects.create(content="tree", embedding=[1, 0, 0])
 ```
 
+#### 用索引定义优化的向量列
+
+定义三维向量列，并使用 [向量搜索索引 (HNSW 索引)](/vector-search-index.md) 对其进行优化。
+
+```python
+class DocumentWithIndex(models.Model):
+   content = models.TextField()
+   # Note:
+   #   - Using comment to add hnsw index is a temporary solution. In the future it will use `CREATE INDEX` syntax.
+   #   - Currently the HNSW index cannot be changed after the table has been created.
+   #   - Only Django >= 4.2 supports `db_comment`.
+   embedding = VectorField(dimensions=3, db_comment="VECTOR INDEX embedding USING HNSW ((VEC_COSINE_DISTANCE(embedding)))")
+```
+
+TiDB 将使用该索引来加速基于余弦距离函数的向量搜索查询。
+
 ### 搜索近邻向量
 
 TiDB 向量支持以下距离函数：
@@ -253,4 +269,5 @@ results = Document.objects.annotate(
 
 ## 另请参阅
 
-- [向量数据类型](/vector-search-data-types.md)
\ No newline at end of file
+- [向量数据类型](/vector-search-data-types.md)
+- [向量搜索索引](/vector-search-index.md)
\ No newline at end of file
diff --git a/vector-search-integrate-with-peewee.md b/vector-search-integrate-with-peewee.md
index d06e1e52f38d..3a3fc3d23d5e 100644
--- a/vector-search-integrate-with-peewee.md
+++ b/vector-search-integrate-with-peewee.md
@@ -223,6 +223,22 @@ Document.create(content='fish', embedding=[1, 2, 4])
 Document.create(content='tree', embedding=[1, 0, 0])
 ```
 
+#### 用索引定义优化的向量列
+
+定义三维矢量列，并使用 [向量搜索索引](/vector-search-index.md) (HNSW 索引) 对其进行优化。
+
+```python
+class DocumentWithIndex(Model):
+    class Meta:
+        database = db
+        table_name = 'peewee_demo_documents_with_index'
+
+    content = TextField()
+    embedding = VectorField(3, constraints=[SQL("VECTOR INDEX embedding USING HNSW ((VEC_COSINE_DISTANCE(embedding)))")])
+```
+
+TiDB 将使用该索引来加速基于余弦距离函数的向量搜索查询。
+
 ### 搜索近邻向量
 
 可以选择使用余弦距离 (`CosineDistance`) 函数，查询与向量 `[1, 2, 3]` 语义最接近的前 3 个 `document`。
@@ -244,4 +260,5 @@ results = Document.select(Document, distance).where(distance_expression < 0.2).o
 
 ## 另请参阅
 
-- [向量数据类型](/vector-search-data-types.md)
\ No newline at end of file
+- [向量数据类型](/vector-search-data-types.md)
+- [向量搜索索引](/vector-search-index.md)
\ No newline at end of file
diff --git a/vector-search-integrate-with-sqlalchemy.md b/vector-search-integrate-with-sqlalchemy.md
index 5650b5bb2cb5..6094302d28e1 100644
--- a/vector-search-integrate-with-sqlalchemy.md
+++ b/vector-search-integrate-with-sqlalchemy.md
@@ -186,6 +186,20 @@ with Session(engine) as session:
    session.commit()
 ```
 
+#### 用索引定义优化的矢量列
+
+定义三维矢量列，并使用 [向量量搜索索引](/vector-search-index.md) (HNSW 索引)对其进行优化。
+
+```python
+class DocumentWithIndex(Base):
+    __tablename__ = 'sqlalchemy_demo_documents_with_index'
+    id = Column(Integer, primary_key=True)
+    content = Column(Text)
+    embedding = Column(VectorType(3), comment="VECTOR INDEX embedding USING HNSW ((VEC_COSINE_DISTANCE(embedding)))")
+```
+
+TiDB 将使用该索引来加速基于余弦距离函数的矢量搜索查询。
+
 ### 搜索近邻向量
 
 可以选择使用余弦距离 (`CosineDistance`) 函数，查询与向量 `[1, 2, 3]` 语义最接近的前 3 个 `document`。
diff --git a/vector-search-limitations.md b/vector-search-limitations.md
index 5c0af31dc65e..838cf8216687 100644
--- a/vector-search-limitations.md
+++ b/vector-search-limitations.md
@@ -9,6 +9,7 @@ summary: 了解 TiDB 向量搜索功能的限制。
 
 - 向量最大支持 16383 维。
 - 向量数据中不支持 `NaN`、`Infinity` 和 `-Infinity` 浮点数。
+- 创建 [向量搜索索引](/vector-search-index.md) 时只支持余弦距离和L2距离。
 - 目前，向量数据类型不支持存储双精度浮点数（该功能计划在未来的版本中支持）。当向 TiDB 中的向量字段插入或存储数据时，如果这些数据的类型是双精度浮点数，TiDB 会将这些双精度浮点数自动转换为单精度浮点数。
 
 ## 反馈

From 634c602580107c14eca2c63c6124e3bff8403eae Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9CEricZequan=E2=80=9D?= <zequany33@gmail.com>
Date: Sat, 14 Sep 2024 10:44:03 +0800
Subject: [PATCH 2/3] modify index name when create vector index
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: “EricZequan” <zequany33@gmail.com>
---
 vector-search-index.md                     | 4 ++--
 vector-search-integrate-with-django-orm.md | 2 +-
 vector-search-integrate-with-peewee.md     | 2 +-
 vector-search-integrate-with-sqlalchemy.md | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/vector-search-index.md b/vector-search-index.md
index 51c11ed96922..b250805af730 100644
--- a/vector-search-index.md
+++ b/vector-search-index.md
@@ -26,7 +26,7 @@ TiDB 目前支持以下向量搜索索引算法：
         id       INT PRIMARY KEY,
         data     VECTOR(5),
         data64   VECTOR64(10),
-        VECTOR INDEX data USING HNSW ((VEC_COSINE_DISTANCE(data)))
+        VECTOR INDEX idx_data USING HNSW ((VEC_COSINE_DISTANCE(data)))
     );
     ```
 
@@ -122,7 +122,7 @@ CREATE TABLE docs (
     ver VARCHAR(10),
     doc TEXT,
     embedding VECTOR(3),
-    VECTOR INDEX embedding USING HNSW ((VEC_COSINE_DISTANCE(embedding)))
+    VECTOR INDEX idx_embedding USING HNSW ((VEC_COSINE_DISTANCE(embedding)))
 ) PARTITION BY LIST COLUMNS (ver) (
     PARTITION p_v1_0 VALUES IN ('v1.0'),
     PARTITION p_v1_1 VALUES IN ('v1.1'),
diff --git a/vector-search-integrate-with-django-orm.md b/vector-search-integrate-with-django-orm.md
index c279a9bef2d0..02a8ac58d029 100644
--- a/vector-search-integrate-with-django-orm.md
+++ b/vector-search-integrate-with-django-orm.md
@@ -235,7 +235,7 @@ class DocumentWithIndex(models.Model):
    #   - Using comment to add hnsw index is a temporary solution. In the future it will use `CREATE INDEX` syntax.
    #   - Currently the HNSW index cannot be changed after the table has been created.
    #   - Only Django >= 4.2 supports `db_comment`.
-   embedding = VectorField(dimensions=3, db_comment="VECTOR INDEX embedding USING HNSW ((VEC_COSINE_DISTANCE(embedding)))")
+   embedding = VectorField(dimensions=3, db_comment="VECTOR INDEX idx_embedding USING HNSW ((VEC_COSINE_DISTANCE(embedding)))")
 ```
 
 TiDB 将使用该索引来加速基于余弦距离函数的向量搜索查询。
diff --git a/vector-search-integrate-with-peewee.md b/vector-search-integrate-with-peewee.md
index 3a3fc3d23d5e..611277c0d0b9 100644
--- a/vector-search-integrate-with-peewee.md
+++ b/vector-search-integrate-with-peewee.md
@@ -234,7 +234,7 @@ class DocumentWithIndex(Model):
         table_name = 'peewee_demo_documents_with_index'
 
     content = TextField()
-    embedding = VectorField(3, constraints=[SQL("VECTOR INDEX embedding USING HNSW ((VEC_COSINE_DISTANCE(embedding)))")])
+    embedding = VectorField(3, constraints=[SQL("VECTOR INDEX idx_embedding USING HNSW ((VEC_COSINE_DISTANCE(embedding)))")])
 ```
 
 TiDB 将使用该索引来加速基于余弦距离函数的向量搜索查询。
diff --git a/vector-search-integrate-with-sqlalchemy.md b/vector-search-integrate-with-sqlalchemy.md
index 6094302d28e1..484e52b62292 100644
--- a/vector-search-integrate-with-sqlalchemy.md
+++ b/vector-search-integrate-with-sqlalchemy.md
@@ -195,7 +195,7 @@ class DocumentWithIndex(Base):
     __tablename__ = 'sqlalchemy_demo_documents_with_index'
     id = Column(Integer, primary_key=True)
     content = Column(Text)
-    embedding = Column(VectorType(3), comment="VECTOR INDEX embedding USING HNSW ((VEC_COSINE_DISTANCE(embedding)))")
+    embedding = Column(VectorType(3), comment="VECTOR INDEX idx_embedding USING HNSW ((VEC_COSINE_DISTANCE(embedding)))")
 ```
 
 TiDB 将使用该索引来加速基于余弦距离函数的矢量搜索查询。

From 4b54e6dd86390494d44627e6006ce63d9aa76059 Mon Sep 17 00:00:00 2001
From: EricZequan <110292382+EricZequan@users.noreply.github.com>
Date: Sat, 14 Sep 2024 11:04:20 +0800
Subject: [PATCH 3/3] Update vector-search-improve-performance.md

---
 vector-search-improve-performance.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vector-search-improve-performance.md b/vector-search-improve-performance.md
index 6c9cc96a171b..6928a79f7ae6 100644
--- a/vector-search-improve-performance.md
+++ b/vector-search-improve-performance.md
@@ -17,7 +17,7 @@ summary: 了解优化 TiDB 向量搜索性能的最佳实践。
 
 ## 减少向量维数或缩短嵌入时间
 
-随着向量大小的增加，向量搜索索引和查询的计算复杂度会显著增加，因为这意味着要进行更多的浮点数比较运算。
+随着向量维度大小的增加，向量搜索索引和查询的计算复杂度会显著增加，因为这意味着要进行更多的浮点数比较运算。
 
 为了优化性能，可以考虑尽可能地减少向量的维数。这通常需要切换到另一种嵌入模型。在切换模型时，你需要确保改变嵌入模型对向量查询准确性的影响。