diff --git a/api/base/v1alpha1/knowledgebase.go b/api/base/v1alpha1/knowledgebase.go index de685effa..a79b51e71 100644 --- a/api/base/v1alpha1/knowledgebase.go +++ b/api/base/v1alpha1/knowledgebase.go @@ -5,6 +5,11 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) +const ( + // UpdateSourceFileAnnotationKey is the key of the update source file annotation + UpdateSourceFileAnnotationKey = Group + "/update-source-file-time" +) + func (kb *KnowledgeBase) VectorStoreCollectionName() string { return kb.Namespace + "_" + kb.Name } diff --git a/controllers/base/knowledgebase_controller.go b/controllers/base/knowledgebase_controller.go index d14da6d66..daa97b58e 100644 --- a/controllers/base/knowledgebase_controller.go +++ b/controllers/base/knowledgebase_controller.go @@ -155,14 +155,27 @@ func (r *KnowledgeBaseReconciler) SetupWithManager(mgr ctrl.Manager) error { } func (r *KnowledgeBaseReconciler) reconcile(ctx context.Context, log logr.Logger, kb *arcadiav1alpha1.KnowledgeBase) (*arcadiav1alpha1.KnowledgeBase, ctrl.Result, error) { - // Observe generation change - if kb.Status.ObservedGeneration != kb.Generation { - kb.Status.ObservedGeneration = kb.Generation + // Observe generation change or manual update + if kb.Status.ObservedGeneration != kb.Generation || kb.Annotations[arcadiav1alpha1.UpdateSourceFileAnnotationKey] != "" { + if kb.Status.ObservedGeneration != kb.Generation { + log.Info("Generation changed") + kb.Status.ObservedGeneration = kb.Generation + } kb = r.setCondition(kb, kb.InitCondition()) if updateStatusErr := r.patchStatus(ctx, kb); updateStatusErr != nil { log.Error(updateStatusErr, "unable to update status after generation update") return kb, ctrl.Result{Requeue: true}, updateStatusErr } + if kb.Annotations[arcadiav1alpha1.UpdateSourceFileAnnotationKey] != "" { + log.Info("Manual update") + kbNew := kb.DeepCopy() + delete(kbNew.Annotations, arcadiav1alpha1.UpdateSourceFileAnnotationKey) + err := r.Patch(ctx, kbNew, client.MergeFrom(kb)) + if err != nil { + return kb, ctrl.Result{Requeue: true}, err + } + } + r.cleanupHasHandledSuccessPath(kb) } if kb.Status.IsReady() { @@ -461,35 +474,17 @@ func (r *KnowledgeBaseReconciler) handleFile(ctx context.Context, log logr.Logge for i, doc := range documents { log.V(5).Info(fmt.Sprintf("document[%d]: embedding:%s, metadata:%v", i, doc.PageContent, doc.Metadata)) } - s, finish, err := vectorstore.NewVectorStore(ctx, store, em, kb.VectorStoreCollectionName(), r.Client, nil) - if err != nil { - return err - } - log.Info("handle file: add documents to embedder") - if _, err = s.AddDocuments(ctx, documents); err != nil { - return err - } - if finish != nil { - finish() - } - log.Info("handle file succeeded") - return nil + return vectorstore.AddDocuments(ctx, log, store, em, kb.VectorStoreCollectionName(), r.Client, nil, documents) } func (r *KnowledgeBaseReconciler) reconcileDelete(ctx context.Context, log logr.Logger, kb *arcadiav1alpha1.KnowledgeBase) { - r.mu.Lock() - for _, fg := range kb.Spec.FileGroups { - for _, path := range fg.Paths { - delete(r.HasHandledSuccessPath, r.hasHandledPathKey(kb, fg, path)) - } - } - r.mu.Unlock() + r.cleanupHasHandledSuccessPath(kb) vectorStore := &arcadiav1alpha1.VectorStore{} if err := r.Get(ctx, types.NamespacedName{Name: kb.Spec.VectorStore.Name, Namespace: kb.Spec.VectorStore.GetNamespace(kb.GetNamespace())}, vectorStore); err != nil { log.Error(err, "reconcile delete: get vector store error, may leave garbage data") return } - _ = vectorstore.RemoveCollection(ctx, log, vectorStore, kb.VectorStoreCollectionName()) + _ = vectorstore.RemoveCollection(ctx, log, vectorStore, kb.VectorStoreCollectionName(), r.Client, nil) } func (r *KnowledgeBaseReconciler) hasHandledPathKey(kb *arcadiav1alpha1.KnowledgeBase, filegroup arcadiav1alpha1.FileGroup, path string) string { @@ -499,3 +494,13 @@ func (r *KnowledgeBaseReconciler) hasHandledPathKey(kb *arcadiav1alpha1.Knowledg } return kb.Name + "/" + kb.Namespace + "/" + sourceName + "/" + path } + +func (r *KnowledgeBaseReconciler) cleanupHasHandledSuccessPath(kb *arcadiav1alpha1.KnowledgeBase) { + r.mu.Lock() + for _, fg := range kb.Spec.FileGroups { + for _, path := range fg.Paths { + delete(r.HasHandledSuccessPath, r.hasHandledPathKey(kb, fg, path)) + } + } + r.mu.Unlock() +} diff --git a/deploy/charts/arcadia/templates/config.yaml b/deploy/charts/arcadia/templates/config.yaml index fe542f7a5..e2919746a 100644 --- a/deploy/charts/arcadia/templates/config.yaml +++ b/deploy/charts/arcadia/templates/config.yaml @@ -1,4 +1,3 @@ -{{- if .Values.postgresql.enabled }} apiVersion: v1 data: config: | @@ -25,7 +24,12 @@ data: vectorStore: apiGroup: arcadia.kubeagi.k8s.com.cn/v1alpha1 kind: VectorStore +{{- if and (.Values.chromadb.enabled) (eq .Values.global.defaultVectorStoreType "chroma") }} name: '{{ .Release.Name }}-vectorstore' +{{- end }} +{{- if and (.Values.postgresql.enabled) (eq .Values.global.defaultVectorStoreType "pgvector") }} + name: '{{ .Release.Name }}-pgvector-vectorstore' +{{- end }} namespace: '{{ .Release.Namespace }}' #streamlit: @@ -36,16 +40,17 @@ data: dataprocess: | llm: qa_retry_count: {{ .Values.dataprocess.config.llm.qa_retry_count }} +{{- if .Values.postgresql.enabled }} postgresql: host: {{ .Release.Name }}-postgresql.{{ .Release.Namespace }}.svc.cluster.local port: {{ .Values.postgresql.containerPorts.postgresql }} user: {{ .Values.postgresql.global.postgresql.auth.username }} password: {{ .Values.postgresql.global.postgresql.auth.password }} database: {{ .Values.postgresql.global.postgresql.auth.database }} +{{- end }} kind: ConfigMap metadata: labels: control-plane: {{ .Release.Name }}-arcadia name: {{ .Release.Name }}-config namespace: {{ .Release.Namespace }} -{{- end }} \ No newline at end of file diff --git a/deploy/charts/arcadia/templates/post-vectorstore.yaml b/deploy/charts/arcadia/templates/post-vectorstore.yaml index 7d3905e0f..9beb3531f 100644 --- a/deploy/charts/arcadia/templates/post-vectorstore.yaml +++ b/deploy/charts/arcadia/templates/post-vectorstore.yaml @@ -1,3 +1,4 @@ +{{- if .Values.chromadb.enabled }} apiVersion: arcadia.kubeagi.k8s.com.cn/v1alpha1 kind: VectorStore metadata: @@ -13,6 +14,7 @@ spec: url: 'http://{{ .Release.Name }}-chromadb.{{ .Release.Namespace }}.svc.cluster.local:{{ .Values.chromadb.chromadb.serverHttpPort }}' chroma: distanceFunction: cosine +{{- end }} {{- if .Values.postgresql.enabled }} --- diff --git a/deploy/charts/arcadia/values.yaml b/deploy/charts/arcadia/values.yaml index 504bfb628..38387ddc9 100644 --- a/deploy/charts/arcadia/values.yaml +++ b/deploy/charts/arcadia/values.yaml @@ -1,6 +1,10 @@ global: oss: bucket: &default-oss-bucket "arcadia" + ## @param global.defaultVectorStoreType Defines the default vector database type, currently `chroma` and `pgvector` are available + ## When the option is `chroma`, it needs `chromadb.enabled` to be `true` as well to work. + ## When the option is `pgvector`, it needs `postgresql.enabled` to be `true` as well to work. + defaultVectorStoreType: pgvector # @section controller is used as the core controller for arcadia # @param image Image to be used diff --git a/pkg/vectorstore/pgvector.go b/pkg/vectorstore/pgvector.go new file mode 100644 index 000000000..4c928a683 --- /dev/null +++ b/pkg/vectorstore/pgvector.go @@ -0,0 +1,176 @@ +/* +Copyright 2024 KubeAGI. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package vectorstore + +import ( + "context" + "fmt" + "reflect" + "strings" + + "github.com/jackc/pgx/v5" + "github.com/tmc/langchaingo/embeddings" + "github.com/tmc/langchaingo/llms/openai" + lanchaingoschema "github.com/tmc/langchaingo/schema" + "github.com/tmc/langchaingo/vectorstores" + "github.com/tmc/langchaingo/vectorstores/pgvector" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/dynamic" + "k8s.io/klog/v2" + "sigs.k8s.io/controller-runtime/pkg/client" + + arcadiav1alpha1 "github.com/kubeagi/arcadia/api/base/v1alpha1" + "github.com/kubeagi/arcadia/pkg/datasource" + "github.com/kubeagi/arcadia/pkg/utils" +) + +var _ vectorstores.VectorStore = (*PGVectorStore)(nil) + +type PGVectorStore struct { + *pgx.Conn + pgvector.Store + *arcadiav1alpha1.PGVector +} + +func NewPGVectorStore(ctx context.Context, vs *arcadiav1alpha1.VectorStore, c client.Client, dc dynamic.Interface, embedder embeddings.Embedder, collectionName string) (v *PGVectorStore, finish func(), err error) { + v = &PGVectorStore{PGVector: vs.Spec.PGVector} + ops := []pgvector.Option{ + pgvector.WithPreDeleteCollection(vs.Spec.PGVector.PreDeleteCollection), + } + if vs.Spec.PGVector.CollectionTableName != "" { + ops = append(ops, pgvector.WithCollectionTableName(vs.Spec.PGVector.CollectionTableName)) + } else { + v.PGVector.CollectionTableName = pgvector.DefaultCollectionStoreTableName + } + if vs.Spec.PGVector.EmbeddingTableName != "" { + ops = append(ops, pgvector.WithEmbeddingTableName(vs.Spec.PGVector.EmbeddingTableName)) + } else { + v.PGVector.EmbeddingTableName = pgvector.DefaultEmbeddingStoreTableName + } + if ref := vs.Spec.PGVector.DataSourceRef; ref != nil { + if err := utils.ValidateClient(c, dc); err != nil { + return nil, nil, err + } + ds := &arcadiav1alpha1.Datasource{} + if c != nil { + if err := c.Get(ctx, types.NamespacedName{Name: ref.Name, Namespace: ref.GetNamespace(vs.GetNamespace())}, ds); err != nil { + return nil, nil, err + } + } else { + obj, err := dc.Resource(schema.GroupVersionResource{Group: "arcadia.kubeagi.k8s.com.cn", Version: "v1alpha1", Resource: "datasources"}). + Namespace(ref.GetNamespace(vs.GetNamespace())).Get(ctx, ref.Name, metav1.GetOptions{}) + if err != nil { + return nil, nil, err + } + err = runtime.DefaultUnstructuredConverter.FromUnstructured(obj.UnstructuredContent(), ds) + if err != nil { + return nil, nil, err + } + } + vs.Spec.Endpoint = ds.Spec.Endpoint.DeepCopy() + pool, err := datasource.GetPostgreSQLPool(ctx, c, dc, ds) + if err != nil { + return nil, nil, err + } + conn, err := pool.Acquire(ctx) + if err != nil { + return nil, nil, err + } + klog.V(5).Info("acquire pg conn from pool") + finish = func() { + if conn != nil { + conn.Release() + klog.V(5).Info("release pg conn to pool") + } + } + v.Conn = conn.Conn() + ops = append(ops, pgvector.WithConn(v.Conn)) + } else { + conn, err := pgx.Connect(ctx, vs.Spec.Endpoint.URL) + if err != nil { + return nil, nil, err + } + v.Conn = conn + ops = append(ops, pgvector.WithConn(conn)) + } + if embedder != nil { + ops = append(ops, pgvector.WithEmbedder(embedder)) + } else { + llm, _ := openai.New() + embedder, _ = embeddings.NewEmbedder(llm) + } + ops = append(ops, pgvector.WithEmbedder(embedder)) + if collectionName != "" { + ops = append(ops, pgvector.WithCollectionName(collectionName)) + v.PGVector.CollectionName = collectionName + } else { + ops = append(ops, pgvector.WithCollectionName(vs.Spec.PGVector.CollectionName)) + } + store, err := pgvector.New(ctx, ops...) + if err != nil { + return nil, nil, err + } + v.Store = store + return v, finish, nil +} + +// RemoveExist remove exist document from pgvector +// Note: it is currently assumed that the embedder of a knowledge base is constant that means the result of embedding a fixed document is fixed, +// disregarding the case where the embedder changes (and if it does, a lot of processing will need to be done in many places, not just here) +func (s *PGVectorStore) RemoveExist(ctx context.Context, document []lanchaingoschema.Document) (doc []lanchaingoschema.Document, err error) { + // get collection_uuid from collection_table, if null, means no exits + collectionUUID := "" + sql := fmt.Sprintf(`SELECT uuid FROM %s WHERE name = $1 ORDER BY name limit 1`, s.PGVector.CollectionTableName) + err = s.Conn.QueryRow(ctx, sql, s.PGVector.CollectionName).Scan(&collectionUUID) + if collectionUUID == "" { + return document, err + } + in := make([]string, 0) + for _, d := range document { + in = append(in, d.PageContent) + } + sql = fmt.Sprintf(`SELECT document, cmetadata FROM %s WHERE collection_id = $1 AND document in ('%s')`, s.PGVector.EmbeddingTableName, strings.Join(in, "', '")) + rows, err := s.Conn.Query(ctx, sql, collectionUUID) + if err != nil { + return nil, err + } + res := make(map[string]lanchaingoschema.Document, 0) + for rows.Next() { + doc := lanchaingoschema.Document{} + if err := rows.Scan(&doc.PageContent, &doc.Metadata); err != nil { + return nil, err + } + res[doc.PageContent] = doc + } + if len(res) == 0 { + return document, nil + } + if len(res) == len(document) { + return nil, nil + } + doc = make([]lanchaingoschema.Document, 0, len(document)) + for _, d := range document { + has, ok := res[d.PageContent] + if !ok || !reflect.DeepEqual(has.Metadata, d.Metadata) { + doc = append(doc, d) + } + } + return doc, nil +} diff --git a/pkg/vectorstore/vectorstore.go b/pkg/vectorstore/vectorstore.go index 4aaaa368f..0bc329709 100644 --- a/pkg/vectorstore/vectorstore.go +++ b/pkg/vectorstore/vectorstore.go @@ -22,21 +22,13 @@ import ( "github.com/go-logr/logr" "github.com/tmc/langchaingo/embeddings" - "github.com/tmc/langchaingo/llms/openai" + lanchaingoschema "github.com/tmc/langchaingo/schema" "github.com/tmc/langchaingo/vectorstores" "github.com/tmc/langchaingo/vectorstores/chroma" - "github.com/tmc/langchaingo/vectorstores/pgvector" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/runtime/schema" - "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/dynamic" - "k8s.io/klog/v2" "sigs.k8s.io/controller-runtime/pkg/client" arcadiav1alpha1 "github.com/kubeagi/arcadia/api/base/v1alpha1" - "github.com/kubeagi/arcadia/pkg/datasource" - "github.com/kubeagi/arcadia/pkg/utils" ) var ( @@ -60,68 +52,7 @@ func NewVectorStore(ctx context.Context, vs *arcadiav1alpha1.VectorStore, embedd } v, err = chroma.New(ops...) case arcadiav1alpha1.VectorStoreTypePGVector: - ops := []pgvector.Option{ - pgvector.WithPreDeleteCollection(vs.Spec.PGVector.PreDeleteCollection), - } - if vs.Spec.PGVector.CollectionTableName != "" { - ops = append(ops, pgvector.WithCollectionTableName(vs.Spec.PGVector.CollectionTableName)) - } - if vs.Spec.PGVector.EmbeddingTableName != "" { - ops = append(ops, pgvector.WithEmbeddingTableName(vs.Spec.PGVector.EmbeddingTableName)) - } - if ref := vs.Spec.PGVector.DataSourceRef; ref != nil { - if err := utils.ValidateClient(c, dc); err != nil { - return nil, nil, err - } - ds := &arcadiav1alpha1.Datasource{} - if c != nil { - if err := c.Get(ctx, types.NamespacedName{Name: ref.Name, Namespace: ref.GetNamespace(vs.GetNamespace())}, ds); err != nil { - return nil, nil, err - } - } else { - obj, err := dc.Resource(schema.GroupVersionResource{Group: "arcadia.kubeagi.k8s.com.cn", Version: "v1alpha1", Resource: "datasources"}). - Namespace(ref.GetNamespace(vs.GetNamespace())).Get(ctx, ref.Name, metav1.GetOptions{}) - if err != nil { - return nil, nil, err - } - err = runtime.DefaultUnstructuredConverter.FromUnstructured(obj.UnstructuredContent(), ds) - if err != nil { - return nil, nil, err - } - } - vs.Spec.Endpoint = ds.Spec.Endpoint.DeepCopy() - pool, err := datasource.GetPostgreSQLPool(ctx, c, dc, ds) - if err != nil { - return nil, nil, err - } - conn, err := pool.Acquire(ctx) - if err != nil { - return nil, nil, err - } - klog.V(5).Info("acquire pg conn from pool") - finish = func() { - if conn != nil { - conn.Release() - klog.V(5).Info("release pg conn to pool") - } - } - ops = append(ops, pgvector.WithConn(conn.Conn())) - } else { - ops = append(ops, pgvector.WithConnectionURL(vs.Spec.Endpoint.URL)) - } - if embedder != nil { - ops = append(ops, pgvector.WithEmbedder(embedder)) - } else { - llm, _ := openai.New() - embedder, _ = embeddings.NewEmbedder(llm) - } - ops = append(ops, pgvector.WithEmbedder(embedder)) - if collectionName != "" { - ops = append(ops, pgvector.WithCollectionName(collectionName)) - } else { - ops = append(ops, pgvector.WithCollectionName(vs.Spec.PGVector.CollectionName)) - } - v, err = pgvector.New(ctx, ops...) + v, finish, err = NewPGVectorStore(ctx, vs, c, dc, embedder, collectionName) case arcadiav1alpha1.VectorStoreTypeUnknown: fallthrough default: @@ -130,7 +61,7 @@ func NewVectorStore(ctx context.Context, vs *arcadiav1alpha1.VectorStore, embedd return v, finish, err } -func RemoveCollection(ctx context.Context, log logr.Logger, vs *arcadiav1alpha1.VectorStore, collectionName string) (err error) { +func RemoveCollection(ctx context.Context, log logr.Logger, vs *arcadiav1alpha1.VectorStore, collectionName string, c client.Client, dc dynamic.Interface) (err error) { switch vs.Spec.Type() { case arcadiav1alpha1.VectorStoreTypeChroma: ops := []chroma.Option{ @@ -151,19 +82,14 @@ func RemoveCollection(ctx context.Context, log logr.Logger, vs *arcadiav1alpha1. return err } case arcadiav1alpha1.VectorStoreTypePGVector: - ops := []pgvector.Option{ - pgvector.WithConnectionURL(vs.Spec.Endpoint.URL), - pgvector.WithPreDeleteCollection(vs.Spec.PGVector.PreDeleteCollection), - pgvector.WithCollectionTableName(vs.Spec.PGVector.CollectionTableName), - } - if collectionName != "" { - ops = append(ops, pgvector.WithCollectionName(collectionName)) - } else { - ops = append(ops, pgvector.WithCollectionName(vs.Spec.PGVector.CollectionName)) - } - v, err := pgvector.New(ctx, ops...) + v, finish, err := NewPGVectorStore(ctx, vs, c, dc, nil, collectionName) + defer func() { + if finish != nil { + finish() + } + }() if err != nil { - log.Error(err, "reconcile delete: init vector store error, may leave garbage data") + log.Error(err, "reconcile delete: init pgvector error, may leave garbage data") return err } if err = v.RemoveCollection(ctx); err != nil { @@ -178,3 +104,26 @@ func RemoveCollection(ctx context.Context, log logr.Logger, vs *arcadiav1alpha1. } return err } + +func AddDocuments(ctx context.Context, log logr.Logger, vs *arcadiav1alpha1.VectorStore, embedder embeddings.Embedder, collectionName string, c client.Client, dc dynamic.Interface, documents []lanchaingoschema.Document) (err error) { + s, finish, err := NewVectorStore(ctx, vs, embedder, collectionName, c, dc) + if err != nil { + return err + } + log.Info("handle file: add documents to embedder") + if store, ok := s.(*PGVectorStore); ok { + // now only pgvector support Row-level updates + log.Info("handle file: use pgvector, filter out exist documents") + if documents, err = store.RemoveExist(ctx, documents); err != nil { + return err + } + } + if _, err = s.AddDocuments(ctx, documents); err != nil { + return err + } + if finish != nil { + finish() + } + log.Info("handle file succeeded") + return nil +} diff --git a/tests/deploy-values.yaml b/tests/deploy-values.yaml index f591f58e1..8763506b8 100644 --- a/tests/deploy-values.yaml +++ b/tests/deploy-values.yaml @@ -1,3 +1,5 @@ +global: + defaultVectorStoreType: pgvector # @section controller is used as the core controller for arcadia # @param image Image to be used # @param imagePullPolcy ImagePullPolicy diff --git a/tests/example-test.sh b/tests/example-test.sh index 100af1d31..b8ef28d38 100755 --- a/tests/example-test.sh +++ b/tests/example-test.sh @@ -158,6 +158,9 @@ function waitCRDStatusReady() { message=$(kubectl -n${namespace} get ${source} ${name} --ignore-not-found=true -o json | jq -r '.status.conditions[0].message') if [[ $readStatus == "True" ]]; then info $message + if [[ ${source} == "KnowledgeBase" ]]; then + kubectl get knowledgebase -n $namespace $name -o json | jq -r '.status.fileGroupDetail[0].fileDetails' + fi break fi @@ -281,7 +284,8 @@ info "7.4.2 create knowledgebase based on pgvector and wait it ready" kubectl apply -f config/samples/arcadia_v1alpha1_knowledgebase_pgvector.yaml waitCRDStatusReady "KnowledgeBase" "arcadia" "knowledgebase-sample-pgvector" -info "7.5 check chroma vectorstore has data" +info "7.5 check vectorstore has data" +info "7.5.1 check chroma vectorstore has data" kubectl port-forward -n arcadia svc/arcadia-chromadb 8000:8000 >/dev/null 2>&1 & chroma_pid=$! info "port-forward chroma in pid: $chroma_pid" @@ -295,6 +299,48 @@ else exit 1 fi +info "7.5.2 check pgvector vectorstore has data" +kubectl port-forward -n arcadia svc/arcadia-postgresql 5432:5432 >/dev/null 2>&1 & +postgres_pid=$! +info "port-forward postgres in pid: $chroma_pid" +sleep 3 +paasword=$(kubectl get secrets -n arcadia arcadia-postgresql -o json | jq -r '.data."postgres-password"' | base64 --decode) +if [[ $GITHUB_ACTIONS == "true" ]]; then + pgdata=$(docker run --net=host --entrypoint="" -e PGPASSWORD=$paasword kubeagi/postgresql:latest psql -U postgres -d arcadia -h localhost -c "select document from langchain_pg_embedding;") +else + pgdata=$(docker run --net=host --entrypoint="" -e PGPASSWORD=$paasword kubeagi/postgresql:latest psql -U postgres -d arcadia -h host.docker.internal -c "select document from langchain_pg_embedding;") +fi +if [[ -z $pgdata ]]; then + info "get no data in postgres" + exit 1 +else + info $pgdata +fi + +info "7.6 update qa.csv to make sure it can be embedding" +echo "newquestion,newanswer" >>pkg/documentloaders/testdata/qa.csv +mc cp pkg/documentloaders/testdata/qa.csv arcadiatest/${bucket}/dataset/dataset-playground/v1/qa.csv +mc tag set arcadiatest/${bucket}/dataset/dataset-playground/v1/qa.csv "object_type=QA" +sleep 3 +kubectl annotate knowledgebase/knowledgebase-sample-pgvector -n arcadia "arcadia.kubeagi.k8s.com.cn/update-source-file-time=$(date)" +sleep 3 +waitCRDStatusReady "KnowledgeBase" "arcadia" "knowledgebase-sample-pgvector" +if [[ $GITHUB_ACTIONS == "true" ]]; then + pgdata=$(docker run --net=host --entrypoint="" -e PGPASSWORD=$paasword kubeagi/postgresql:latest psql -U postgres -d arcadia -h localhost -c "select document from langchain_pg_embedding;") +else + pgdata=$(docker run --net=host --entrypoint="" -e PGPASSWORD=$paasword kubeagi/postgresql:latest psql -U postgres -d arcadia -h host.docker.internal -c "select document from langchain_pg_embedding;") +fi +if [[ -z $pgdata ]]; then + info "get no data in postgres" + exit 1 +else + if [[ ! $pgdata =~ "newquestion" ]]; then + info "get no new data in postgres" + exit 1 + fi + info $pgdata +fi + info "8 validate simple app can work normally" info "Prepare dependent LLM service" kubectl apply -f config/samples/app_shared_llm_service.yaml