From e62418abfde43b4c540ba3ace791e867e48c1fbc Mon Sep 17 00:00:00 2001
From: Adam Hendel <15756360+ChuckHend@users.noreply.github.com>
Date: Mon, 23 Oct 2023 19:50:31 -0500
Subject: [PATCH] add index

---
 Cargo.toml    |  2 +-
 Trunk.toml    |  2 +-
 src/api.rs    |  4 +++-
 src/init.rs   | 20 +++++++++++++++++---
 src/search.rs |  1 +
 5 files changed, 23 insertions(+), 6 deletions(-)
diff --git a/Cargo.toml b/Cargo.toml
index 23d2b5e..a8ae3a7 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "vectorize"
-version = "0.1.2"
+version = "0.2.0"
 edition = "2021"
 publish = false
 
diff --git a/Trunk.toml b/Trunk.toml
index b8006b5..ab131ec 100644
--- a/Trunk.toml
+++ b/Trunk.toml
@@ -6,7 +6,7 @@ description = "The simplest implementation of LLM-backed vector search on Postgr
 homepage = "https://github.com/tembo-io/pg_vectorize"
 documentation = "https://github.com/tembo-io/pg_vectorize"
 categories = ["orchestration", "machine_learning"]
-version = "0.1.2"
+version = "0.2.0"
 
 [build]
 postgres_version = "15"
diff --git a/src/api.rs b/src/api.rs
index a3677ac..52b3d9c 100644
--- a/src/api.rs
+++ b/src/api.rs
@@ -97,7 +97,9 @@ fn table(
     );
 
     let ran: Result<_, spi::Error> = Spi::connect(|mut c| {
-        let _r = c.update(&init_embed_q, None, None)?;
+        for q in init_embed_q {
+            let _r = c.update(&q, None, None)?;
+        }
         Ok(())
     });
     if let Err(e) = ran {
diff --git a/src/init.rs b/src/init.rs
index c7e9d2f..f320dee 100644
--- a/src/init.rs
+++ b/src/init.rs
@@ -65,7 +65,7 @@ pub fn init_embedding_table_query(
     transformer: &types::Transformer,
     search_alg: &types::SimilarityAlg,
     transform_method: &TableMethod,
-) -> String {
+) -> Vec<String> {
     // TODO: when adding support for other models, add the output dimension to the transformer attributes
     // so that they can be read here, not hard-coded here below
     // currently only supports the text-embedding-ada-002 embedding model - output dim 1536
@@ -80,8 +80,15 @@ pub fn init_embedding_table_query(
         (types::Transformer::openai, types::SimilarityAlg::pgv_cosine_similarity) => "vector(1536)",
     };
     match transform_method {
-        TableMethod::append => append_embedding_column(job_name, schema, table, col_type),
-        TableMethod::join => create_embedding_table(job_name, col_type),
+        TableMethod::append => {
+            vec![
+                append_embedding_column(job_name, schema, table, col_type),
+                create_hnsw_cosine_index(job_name, schema, table),
+            ]
+        }
+        TableMethod::join => {
+            vec![create_embedding_table(job_name, col_type)]
+        }
     }
 }
 
@@ -97,6 +104,13 @@ fn create_embedding_table(job_name: &str, col_type: &str) -> String {
     )
 }
 
+fn create_hnsw_cosine_index(job_name: &str, schema: &str, table: &str) -> String {
+    format!(
+        "CREATE INDEX IF NOT EXISTS {job_name}_idx ON {schema}.{table} USING hnsw ({job_name}_embeddings vector_cosine_ops);
+        ",
+    )
+}
+
 fn append_embedding_column(job_name: &str, schema: &str, table: &str, col_type: &str) -> String {
     // TODO: when adding support for other models, add the output dimension to the transformer attributes
     // so that they can be read here, not hard-coded here below
diff --git a/src/search.rs b/src/search.rs
index 44aca68..12a3b23 100644
--- a/src/search.rs
+++ b/src/search.rs
@@ -15,6 +15,7 @@ pub fn cosine_similarity_search(
         1 - ({project}_embeddings <=> '{emb}'::vector) AS cosine_similarity,
         *
     FROM {schema}.{table}
+    WHERE {project}_updated_at is NOT NULL
     ORDER BY cosine_similarity DESC
     LIMIT {num_results};
     "