Skip to content

Commit

Permalink
Merge pull request #17 from tembo-io/fix/missingEmbeds
Browse files Browse the repository at this point in the history
add index
  • Loading branch information
ChuckHend authored Oct 24, 2023
2 parents 0f3727c + e62418a commit 019fa00
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 6 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "vectorize"
version = "0.1.2"
version = "0.2.0"
edition = "2021"
publish = false

Expand Down
2 changes: 1 addition & 1 deletion Trunk.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ description = "The simplest implementation of LLM-backed vector search on Postgr
homepage = "https://github.com/tembo-io/pg_vectorize"
documentation = "https://github.com/tembo-io/pg_vectorize"
categories = ["orchestration", "machine_learning"]
version = "0.1.2"
version = "0.2.0"

[build]
postgres_version = "15"
Expand Down
4 changes: 3 additions & 1 deletion src/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,9 @@ fn table(
);

let ran: Result<_, spi::Error> = Spi::connect(|mut c| {
let _r = c.update(&init_embed_q, None, None)?;
for q in init_embed_q {
let _r = c.update(&q, None, None)?;
}
Ok(())
});
if let Err(e) = ran {
Expand Down
20 changes: 17 additions & 3 deletions src/init.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ pub fn init_embedding_table_query(
transformer: &types::Transformer,
search_alg: &types::SimilarityAlg,
transform_method: &TableMethod,
) -> String {
) -> Vec<String> {
// TODO: when adding support for other models, add the output dimension to the transformer attributes
// so that they can be read here, not hard-coded here below
// currently only supports the text-embedding-ada-002 embedding model - output dim 1536
Expand All @@ -80,8 +80,15 @@ pub fn init_embedding_table_query(
(types::Transformer::openai, types::SimilarityAlg::pgv_cosine_similarity) => "vector(1536)",
};
match transform_method {
TableMethod::append => append_embedding_column(job_name, schema, table, col_type),
TableMethod::join => create_embedding_table(job_name, col_type),
TableMethod::append => {
vec![
append_embedding_column(job_name, schema, table, col_type),
create_hnsw_cosine_index(job_name, schema, table),
]
}
TableMethod::join => {
vec![create_embedding_table(job_name, col_type)]
}
}
}

Expand All @@ -97,6 +104,13 @@ fn create_embedding_table(job_name: &str, col_type: &str) -> String {
)
}

fn create_hnsw_cosine_index(job_name: &str, schema: &str, table: &str) -> String {
format!(
"CREATE INDEX IF NOT EXISTS {job_name}_idx ON {schema}.{table} USING hnsw ({job_name}_embeddings vector_cosine_ops);
",
)
}

fn append_embedding_column(job_name: &str, schema: &str, table: &str, col_type: &str) -> String {
// TODO: when adding support for other models, add the output dimension to the transformer attributes
// so that they can be read here, not hard-coded here below
Expand Down
1 change: 1 addition & 0 deletions src/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ pub fn cosine_similarity_search(
1 - ({project}_embeddings <=> '{emb}'::vector) AS cosine_similarity,
*
FROM {schema}.{table}
WHERE {project}_updated_at is NOT NULL
ORDER BY cosine_similarity DESC
LIMIT {num_results};
"
Expand Down

0 comments on commit 019fa00

Please sign in to comment.