Skip to content

Commit

Permalink
Merge pull request kubeagi#74 from 0xff-dev/chromadb
Browse files Browse the repository at this point in the history
feat: add chromadb
  • Loading branch information
bjwswang authored Sep 1, 2023
2 parents 27017f2 + 61013e2 commit 9836b39
Show file tree
Hide file tree
Showing 6 changed files with 320 additions and 11 deletions.
11 changes: 7 additions & 4 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@ module github.com/kubeagi/arcadia
go 1.20

require (
github.com/amikos-tech/chroma-go v0.0.0-20230809210405-a3fb26bbc4b4
github.com/go-logr/logr v1.2.0
github.com/golang-jwt/jwt v3.2.2+incompatible
github.com/onsi/ginkgo v1.16.5
github.com/onsi/gomega v1.18.1
github.com/r3labs/sse/v2 v2.10.0
github.com/spf13/cobra v1.4.0
github.com/tmc/langchaingo v0.0.0-20230829032728-c85d3967da08
k8s.io/api v0.24.2
k8s.io/apimachinery v0.24.2
k8s.io/client-go v0.24.2
Expand All @@ -17,7 +19,7 @@ require (
)

require (
cloud.google.com/go/compute v1.19.1 // indirect
cloud.google.com/go/compute v1.20.1 // indirect
cloud.google.com/go/compute/metadata v0.2.3 // indirect
github.com/Azure/go-autorest v14.2.0+incompatible // indirect
github.com/Azure/go-autorest/autorest v0.11.18 // indirect
Expand All @@ -30,6 +32,7 @@ require (
github.com/beorn7/perks v1.0.1 // indirect
github.com/cespare/xxhash/v2 v2.1.2 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/dlclark/regexp2 v1.8.1 // indirect
github.com/emicklei/go-restful v2.9.5+incompatible // indirect
github.com/evanphx/json-patch v4.12.0+incompatible // indirect
github.com/form3tech-oss/jwt-go v3.2.3+incompatible // indirect
Expand All @@ -56,25 +59,25 @@ require (
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/nxadm/tail v1.4.8 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pkoukk/tiktoken-go v0.1.2 // indirect
github.com/prometheus/client_golang v1.12.1 // indirect
github.com/prometheus/client_model v0.2.0 // indirect
github.com/prometheus/common v0.32.1 // indirect
github.com/prometheus/procfs v0.7.3 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/stretchr/testify v1.8.4 // indirect
go.uber.org/atomic v1.7.0 // indirect
go.uber.org/multierr v1.6.0 // indirect
go.uber.org/zap v1.19.1 // indirect
golang.org/x/crypto v0.12.0 // indirect
golang.org/x/net v0.14.0 // indirect
golang.org/x/oauth2 v0.8.0 // indirect
golang.org/x/oauth2 v0.10.0 // indirect
golang.org/x/sys v0.11.0 // indirect
golang.org/x/term v0.11.0 // indirect
golang.org/x/text v0.12.0 // indirect
golang.org/x/time v0.0.0-20220210224613-90d013bbcef8 // indirect
gomodules.xyz/jsonpatch/v2 v2.2.0 // indirect
google.golang.org/appengine v1.6.7 // indirect
google.golang.org/protobuf v1.30.0 // indirect
google.golang.org/protobuf v1.31.0 // indirect
gopkg.in/cenkalti/backoff.v1 v1.1.0 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect
Expand Down
21 changes: 14 additions & 7 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ cloud.google.com/go/bigquery v1.4.0/go.mod h1:S8dzgnTigyfTmLBfrtrhyYhwRxG72rYxvf
cloud.google.com/go/bigquery v1.5.0/go.mod h1:snEHRnqQbz117VIFhE8bmtwIDY80NLUZUMb4Nv6dBIg=
cloud.google.com/go/bigquery v1.7.0/go.mod h1://okPTzCYNXSlb24MZs83e2Do+h+VXtc4gLoIoXIAPc=
cloud.google.com/go/bigquery v1.8.0/go.mod h1:J5hqkt3O0uAFnINi6JXValWIb1v0goeZM77hZzJN/fQ=
cloud.google.com/go/compute v1.19.1 h1:am86mquDUgjGNWxiGn+5PGLbmgiWXlE/yNWpIpNvuXY=
cloud.google.com/go/compute v1.19.1/go.mod h1:6ylj3a05WF8leseCdIf77NK0g1ey+nj5IKd5/kvShxE=
cloud.google.com/go/compute v1.20.1 h1:6aKEtlUiwEpJzM001l0yFkpXmUVXaN8W+fbkb2AZNbg=
cloud.google.com/go/compute v1.20.1/go.mod h1:4tCnrn48xsqlwSAiLf1HXMQk8CONslYbdiEZc9FEIbM=
cloud.google.com/go/compute/metadata v0.2.3 h1:mg4jlk7mCAj6xXp9UJ4fjI9VUI5rubuGBW5aJ7UnBMY=
cloud.google.com/go/compute/metadata v0.2.3/go.mod h1:VAV5nSsACxMJvgaAuX6Pk2AawlZn8kiOGuCv6gTkwuA=
cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE=
Expand Down Expand Up @@ -70,6 +70,8 @@ github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuy
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk541a8SKzHPHnH3zbiI+7dagKZ0cgpgrD7Fyho=
github.com/amikos-tech/chroma-go v0.0.0-20230809210405-a3fb26bbc4b4 h1:ttEvyy/Bo4urvOOedg73ywpq1J3ChoUtGubbka14ptY=
github.com/amikos-tech/chroma-go v0.0.0-20230809210405-a3fb26bbc4b4/go.mod h1:bPy9xmWK59Ix/nteEqIvPfAI0L07di+uZMb+RiYbles=
github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY=
github.com/antlr/antlr4/runtime/Go/antlr v0.0.0-20210826220005-b48c857c3a0e/go.mod h1:F7bn7fEU90QkQ3tnmaTx3LTKLEDqnwWODIYppRQ5hnY=
github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o=
Expand Down Expand Up @@ -121,6 +123,8 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ=
github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no=
github.com/dlclark/regexp2 v1.8.1 h1:6Lcdwya6GjPUNsBct8Lg/yRPwMhABj269AAzdGSiR+0=
github.com/dlclark/regexp2 v1.8.1/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815/go.mod h1:WwZ+bS3ebgob9U8Nd0kOddGdZWjyMGR8Wziv+TBNwSE=
github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
github.com/elazarl/goproxy v0.0.0-20180725130230-947c36da3153/go.mod h1:/Zj4wYkgs4iZTTu3o/KG3Itv/qCCa8VVMlb3i9OVuzc=
Expand Down Expand Up @@ -398,6 +402,8 @@ github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINE
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/sftp v1.10.1/go.mod h1:lYOWFsE0bwd1+KfKJaKeuokY15vzFx25BLbzYYoAxZI=
github.com/pkoukk/tiktoken-go v0.1.2 h1:u7PCSBiWJ3nJYoTGShyM9iHXz4dNyYkurwwp+GHtyHY=
github.com/pkoukk/tiktoken-go v0.1.2/go.mod h1:boMWvk9pQCOTx11pgu0DrIdrAKgQzzJKUP6vLXaz7Rw=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI=
Expand Down Expand Up @@ -471,10 +477,11 @@ github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw=
github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U=
github.com/tmc/grpc-websocket-proxy v0.0.0-20201229170055-e5319fda7802/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U=
github.com/tmc/langchaingo v0.0.0-20230829032728-c85d3967da08 h1:ga3HJeF9v5Lyob60oXuasXmeWMwKhygnUnXxYQRj1YI=
github.com/tmc/langchaingo v0.0.0-20230829032728-c85d3967da08/go.mod h1:vCdA1t5qnS5YPkDsznowOziBHFn0Ul11ZqfJ2GOAi0s=
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU=
github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
Expand Down Expand Up @@ -642,8 +649,8 @@ golang.org/x/oauth2 v0.0.0-20210220000619-9bb904979d93/go.mod h1:KelEdhl1UZF7XfJ
golang.org/x/oauth2 v0.0.0-20210313182246-cd4f82c27b84/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
golang.org/x/oauth2 v0.0.0-20210514164344-f6687ab2804c/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
golang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
golang.org/x/oauth2 v0.8.0 h1:6dkIjl3j3LtZ/O3sTgZTMsLKSftL/B8Zgq4huOIIUu8=
golang.org/x/oauth2 v0.8.0/go.mod h1:yr7u4HXZRm1R1kBWqr/xKNqewf0plRYoB7sla+BCIXE=
golang.org/x/oauth2 v0.10.0 h1:zHCpF2Khkwy4mMB4bv0U37YtJdTGW8jI0glAApi0Kh8=
golang.org/x/oauth2 v0.10.0/go.mod h1:kTpgurOux7LqtuxjuyZa4Gj2gdezIt/jQtGnNFfypQI=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
Expand Down Expand Up @@ -920,8 +927,8 @@ google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlba
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
google.golang.org/protobuf v1.30.0 h1:kPPoIgf3TsEvrm0PFe15JQ+570QVxYzEvvHqChK+cng=
google.golang.org/protobuf v1.30.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8=
google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
gopkg.in/cenkalti/backoff.v1 v1.1.0 h1:Arh75ttbsvlpVA7WtVpH4u9h6Zl46xuptxqLxPiSo4Y=
gopkg.in/cenkalti/backoff.v1 v1.1.0/go.mod h1:J6Vskwqd+OMVJl8C33mmtxTBs2gyzfv7UDAkHu8BrjI=
Expand Down
161 changes: 161 additions & 0 deletions pkg/vectorstores/chromadb/chroma.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
/*
Copyright 2023 KubeAGI.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package chromadb

import (
"context"
"fmt"
"net/http"

chroma "github.com/amikos-tech/chroma-go"
openapiclient "github.com/amikos-tech/chroma-go/swagger"
"github.com/tmc/langchaingo/schema"
vs "github.com/tmc/langchaingo/vectorstores"
)

type chromadb struct {
option *option
client *chroma.Client
}

func NewChromaDB(opts ...Option) (vs.VectorStore, error) {
store := &chromadb{option: &option{host: "http://localhost", port: 8000, textKey: "text", distanceFunc: chroma.L2}}
for _, opt := range opts {
opt(store.option)
}
if err := store.verify(); err != nil {
return nil, err
}
c := http.DefaultClient
if store.option.transport != nil {
c = &http.Client{
Transport: store.option.transport,
}
}
cfg := openapiclient.Configuration{
Servers: openapiclient.ServerConfigurations{
{
URL: fmt.Sprintf("%s:%d", store.option.host, store.option.port),
Description: "chromadb server",
},
},
HTTPClient: c,
}

store.client = &chroma.Client{ApiClient: openapiclient.NewAPIClient(&cfg)}
return store, nil
}

func (c *chromadb) verify() error {
if c.option.collectionName == "" {
return fmt.Errorf("collectioName can't be empty")
}
if c.option.embeddr == nil {
return fmt.Errorf("embedder is empty")
}

return nil
}

func (c *chromadb) getOptions(options ...vs.Option) vs.Options {
opts := vs.Options{}
for _, opt := range options {
opt(&opts)
}
return opts
}

// find where and where documents
func (c *chromadb) getFilter(opts vs.Options) (map[string]interface{}, map[string]interface{}) {
mustBeArray, ok := opts.Filters.([]interface{})
if !ok {
return nil, nil
}
if len(mustBeArray) != 2 {
return nil, nil
}
a, oka := mustBeArray[0].(map[string]interface{})
b, okb := mustBeArray[1].(map[string]interface{})
if oka && okb {
return a, b
}

return nil, nil
}

func (c *chromadb) addDocuments(ctx context.Context, texts, ids []string, metadatas []map[string]interface{}) error {
localEmbedder := &LocalEmbedder{Embedder: c.option.embeddr}
collection, err := c.client.CreateCollection(c.option.collectionName, map[string]interface{}{}, true, localEmbedder, c.option.distanceFunc)
if err != nil {
return err
}
vectors, err := localEmbedder.CreateEmbedding(texts)
if err != nil {
return err
}
if len(vectors) != len(texts) {
return fmt.Errorf("number of vectors from embedder does not match number of documents")
}
_, err = collection.Add(vectors, metadatas, texts, ids)
return err
}

func (c *chromadb) AddDocuments(ctx context.Context, docs []schema.Document, options ...vs.Option) error {
texts := make([]string, 0, len(docs))
ids := make([]string, len(docs))
for idx, doc := range docs {
texts = append(texts, doc.PageContent)
ids[idx] = fmt.Sprintf("%d", idx)
}

metadatas := make([]map[string]interface{}, 0)
for i := 0; i < len(docs); i++ {
metadata := make(map[string]interface{})
for k, v := range docs[i].Metadata {
metadata[k] = v
}
metadata[c.option.textKey] = texts[i]
metadatas = append(metadatas, metadata)
}
return c.addDocuments(ctx, texts, ids, metadatas)
}

func (c *chromadb) SimilaritySearch(ctx context.Context, query string, numDocuments int, options ...vs.Option) ([]schema.Document, error) {
localEmbedder := &LocalEmbedder{Embedder: c.option.embeddr}
collection, err := c.client.GetCollection(c.option.collectionName, localEmbedder)
if err != nil {
return nil, err
}
opts := c.getOptions(options...)
where, whereDocument := c.getFilter(opts)
result, err := collection.Query([]string{query}, int32(numDocuments), where, whereDocument, nil)
if err != nil {
return nil, err
}

dl := len(result.Documents[0])
documents := make([]schema.Document, dl)
// {"ids":[["0001","0003"]],"distances":[[0.0005762028712337219,0.0005762028712337219]],"metadatas":[[{"chapter":"3","verse":"16"},{"chapter":"29","verse":"11"}]],"embeddings":null,"documents":[["doc1","doc3"]]}
for i := 0; i < dl; i++ {
doc := schema.Document{
Metadata: result.Metadatas[0][i],
PageContent: result.Documents[0][i],
}
documents[i] = doc
}

return documents, nil
}
16 changes: 16 additions & 0 deletions pkg/vectorstores/chromadb/doc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
/*
Copyright 2023 KubeAGI.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package chromadb
45 changes: 45 additions & 0 deletions pkg/vectorstores/chromadb/local_embedder.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
Copyright 2023 KubeAGI.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package chromadb

import (
"context"

"github.com/tmc/langchaingo/embeddings"
)

type LocalEmbedder struct {
embeddings.Embedder
}

func (l *LocalEmbedder) CreateEmbedding(documents []string) ([][]float32, error) {
vectors, err := l.Embedder.EmbedDocuments(context.TODO(), documents)
if err != nil {
return nil, err
}
target := make([][]float32, len(vectors))
for row := 0; row < len(vectors); row++ {
target[row] = make([]float32, len(vectors[row]))
for col := 0; col < len(vectors[row]); col++ {
target[row][col] = float32(vectors[row][col])
}
}
return target, nil
}

func (l *LocalEmbedder) CreateEmbeddingWithModel(documents []string, model string) ([][]float32, error) {
return l.CreateEmbedding(documents)
}
Loading

0 comments on commit 9836b39

Please sign in to comment.