diff --git a/config/samples/arcadia_v1alpha1_worker_baichuan2-7b.yaml b/config/samples/arcadia_v1alpha1_worker_baichuan2-7b.yaml index 806b55639..f60224138 100644 --- a/config/samples/arcadia_v1alpha1_worker_baichuan2-7b.yaml +++ b/config/samples/arcadia_v1alpha1_worker_baichuan2-7b.yaml @@ -25,7 +25,7 @@ spec: image: kubeagi/minio-mc:RELEASE.2023-01-28T20-29-38Z imagePullPolicy: IfNotPresent runner: - image: kubeagi/arcadia-fastchat-worker:v0.2.0 + image: kubeagi/arcadia-fastchat-worker:v0.2.36 imagePullPolicy: IfNotPresent resources: limits: diff --git a/config/samples/arcadia_v1alpha1_worker_bge-large-zh-v1.5.yaml b/config/samples/arcadia_v1alpha1_worker_bge-large-zh-v1.5.yaml index f3ed77e3d..f1ddd8de3 100644 --- a/config/samples/arcadia_v1alpha1_worker_bge-large-zh-v1.5.yaml +++ b/config/samples/arcadia_v1alpha1_worker_bge-large-zh-v1.5.yaml @@ -12,7 +12,7 @@ spec: image: kubeagi/minio-mc:RELEASE.2023-01-28T20-29-38Z imagePullPolicy: IfNotPresent runner: - image: kubeagi/arcadia-fastchat-worker:v0.2.0 + image: kubeagi/arcadia-fastchat-worker:v0.2.36 imagePullPolicy: IfNotPresent model: kind: "Models" diff --git a/config/samples/arcadia_v1alpha1_worker_qwen-7b-chat.yaml b/config/samples/arcadia_v1alpha1_worker_qwen-7b-chat.yaml index 61dbfba44..f3c99ed12 100644 --- a/config/samples/arcadia_v1alpha1_worker_qwen-7b-chat.yaml +++ b/config/samples/arcadia_v1alpha1_worker_qwen-7b-chat.yaml @@ -15,7 +15,7 @@ spec: image: kubeagi/minio-mc:RELEASE.2023-01-28T20-29-38Z imagePullPolicy: IfNotPresent runner: - image: kubeagi/arcadia-fastchat-worker:v0.2.0 + image: kubeagi/arcadia-fastchat-worker:v0.2.36 imagePullPolicy: IfNotPresent resources: limits: diff --git a/deploy/charts/arcadia/Chart.yaml b/deploy/charts/arcadia/Chart.yaml index 8ed823f39..110168873 100644 --- a/deploy/charts/arcadia/Chart.yaml +++ b/deploy/charts/arcadia/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: arcadia description: A Helm chart(Also a KubeBB Component) for KubeAGI Arcadia type: application -version: 0.3.19 +version: 0.3.20 appVersion: "0.2.0" keywords: diff --git a/deploy/charts/arcadia/README.md b/deploy/charts/arcadia/README.md index af4ee551b..efdc09492 100644 --- a/deploy/charts/arcadia/README.md +++ b/deploy/charts/arcadia/README.md @@ -17,7 +17,6 @@ global settings of arcadia chart. | Parameter | Description | Default | | ------------------------ | ------------------------------------------------------------ | ----------- | -| `oss.bucket` | Name of the bucket where data is stored | `"arcadia"` | | `defaultVectorStoreType` | Defines the default vector database type, currently `chroma` and `pgvector` are available | `pgvector` | ### controller diff --git a/deploy/charts/arcadia/templates/config.yaml b/deploy/charts/arcadia/templates/config.yaml index 0ec121b5a..d8f83c9e6 100644 --- a/deploy/charts/arcadia/templates/config.yaml +++ b/deploy/charts/arcadia/templates/config.yaml @@ -38,14 +38,20 @@ data: name: '{{ .Release.Name }}-pgvector-vectorstore' {{- end }} namespace: '{{ .Release.Namespace }}' -{{- if .Values.rerank.enabled }} +{{- if .Values.config.embedder.enabled }} + embedder: + apiGroup: arcadia.kubeagi.k8s.com.cn/v1alpha1 + kind: Embedder + name: {{ .Release.Name }}-embedder + namespace: {{ .Release.Namespace }} +{{- end }} +{{- if .Values.config.rerank.enabled }} rerank: apiGroup: arcadia.kubeagi.k8s.com.cn/v1alpha1 kind: Model - name: bge-reranker-large + name: {{ .Values.config.rerank.model }} namespace: {{ .Release.Namespace }} {{- end }} - #streamlit: # image: 172.22.96.34/cluster_system/streamlit:v1.29.0 # ingressClassName: portal-ingress diff --git a/deploy/charts/arcadia/templates/post-embedder.yaml b/deploy/charts/arcadia/templates/post-embedder.yaml new file mode 100644 index 000000000..b160e7abf --- /dev/null +++ b/deploy/charts/arcadia/templates/post-embedder.yaml @@ -0,0 +1,19 @@ +{{- if .Values.config.embedder.enabled }} +apiVersion: arcadia.kubeagi.k8s.com.cn/v1alpha1 +kind: Worker +metadata: + name: {{ .Release.Name }}-embedder + namespace: {{ .Release.Namespace }} + annotations: + "helm.sh/hook": post-install + "helm.sh/hook-weight": "2" +spec: + displayName: SystemEmbedder + description: "这是系统默认使用的Embedding模型服务" + type: "fastchat" + replicas: 1 + model: + kind: "Models" + name: {{ .Values.config.embedder.model }} + namespace: {{ .Release.Namespace }} +{{- end }} \ No newline at end of file diff --git a/deploy/charts/arcadia/values.yaml b/deploy/charts/arcadia/values.yaml index 5de0a17c0..74e96983d 100644 --- a/deploy/charts/arcadia/values.yaml +++ b/deploy/charts/arcadia/values.yaml @@ -20,6 +20,18 @@ global: - postgresql..nip.io ip: +# @section config is used to configure the system +config: + # embedder is used as the system default embedding service + embedder: + enabled: true + model: "bge-large-zh-v1.5" + # rerank is the default model for reranking service + rerank: + enabled: true + model: "bge-reranker-large" + + # @section controller is used as the core controller for arcadia # @param image Image to be used # @param imagePullPolcy ImagePullPolicy diff --git a/pkg/appruntime/chain/llmchain.go b/pkg/appruntime/chain/llmchain.go index 0a22b17c2..420a4413c 100644 --- a/pkg/appruntime/chain/llmchain.go +++ b/pkg/appruntime/chain/llmchain.go @@ -76,12 +76,6 @@ func (l *LLMChain) Run(ctx context.Context, _ client.Client, args map[string]any instance := l.Instance options := GetChainOptions(instance.Spec.CommonChainConfig) - needStream := false - needStream, ok = args[base.InputIsNeedStreamKeyInArg].(bool) - if ok && needStream { - options = append(options, chains.WithStreamingFunc(stream(args))) - } - // Check if have files as input v3, ok := args["documents"] if ok { @@ -123,12 +117,17 @@ func (l *LLMChain) Run(ctx context.Context, _ client.Client, args map[string]any l.LLMChain = *chain var out string - - // Predict based on options - if len(options) > 0 { + needStream := false + needStream, ok = args[base.InputIsNeedStreamKeyInArg].(bool) + if ok && needStream { + options = append(options, chains.WithStreamingFunc(stream(args))) out, err = chains.Predict(ctx, l.LLMChain, args, options...) } else { - out, err = chains.Predict(ctx, l.LLMChain, args) + if len(options) > 0 { + out, err = chains.Predict(ctx, l.LLMChain, args, options...) + } else { + out, err = chains.Predict(ctx, l.LLMChain, args) + } } out, err = handleNoErrNoOut(ctx, needStream, out, err, l.LLMChain, args, options) diff --git a/pkg/appruntime/chain/retrievalqachain.go b/pkg/appruntime/chain/retrievalqachain.go index 48ad5780c..292e76d4a 100644 --- a/pkg/appruntime/chain/retrievalqachain.go +++ b/pkg/appruntime/chain/retrievalqachain.go @@ -104,11 +104,6 @@ func (l *RetrievalQAChain) Run(ctx context.Context, _ client.Client, args map[st instance := l.Instance options := GetChainOptions(instance.Spec.CommonChainConfig) - needStream := false - needStream, ok = args[base.InputIsNeedStreamKeyInArg].(bool) - if ok && needStream { - options = append(options, chains.WithStreamingFunc(stream(args))) - } // Check if have files as input v5, ok := args["documents"] @@ -137,12 +132,17 @@ func (l *RetrievalQAChain) Run(ctx context.Context, _ client.Client, args map[st l.ConversationalRetrievalQA = chain args["query"] = args["question"] var out string - - // Predict based on options - if len(options) > 0 { + needStream := false + needStream, ok = args[base.InputIsNeedStreamKeyInArg].(bool) + if ok && needStream { + options = append(options, chains.WithStreamingFunc(stream(args))) out, err = chains.Predict(ctx, l.ConversationalRetrievalQA, args, options...) } else { - out, err = chains.Predict(ctx, l.ConversationalRetrievalQA, args) + if len(options) > 0 { + out, err = chains.Predict(ctx, l.ConversationalRetrievalQA, args, options...) + } else { + out, err = chains.Predict(ctx, l.ConversationalRetrievalQA, args) + } } out, err = handleNoErrNoOut(ctx, needStream, out, err, l.ConversationalRetrievalQA, args, options)