diff --git a/PROJECT b/PROJECT index c5c2138b6..0f57bac66 100644 --- a/PROJECT +++ b/PROJECT @@ -90,4 +90,22 @@ resources: kind: Model path: github.com/kubeagi/arcadia/api/v1alpha1 version: v1alpha1 +- api: + crdVersion: v1 + namespaced: true + controller: true + domain: kubeagi.k8s.com.cn + group: arcadia + kind: KnowledgeBase + path: github.com/kubeagi/arcadia/api/v1alpha1 + version: v1alpha1 +- api: + crdVersion: v1 + namespaced: true + controller: true + domain: kubeagi.k8s.com.cn + group: arcadia + kind: VectorStore + path: github.com/kubeagi/arcadia/api/v1alpha1 + version: v1alpha1 version: "3" diff --git a/api/v1alpha1/common.go b/api/v1alpha1/common.go index 070672856..9e2cfa650 100644 --- a/api/v1alpha1/common.go +++ b/api/v1alpha1/common.go @@ -16,6 +16,10 @@ limitations under the License. package v1alpha1 +import ( + "github.com/kubeagi/arcadia/pkg/utils" +) + const ( // Finalizer is the key of the finalizer Finalizer = Group + "/finalizer" @@ -67,6 +71,13 @@ func (in *TypedObjectReference) WithNameSpace(namespace string) { in.Namespace = &namespace } +func (in *TypedObjectReference) GetNamespace() string { + if in.Namespace == nil { + return utils.GetSelfNamespace() + } + return *in.Namespace +} + // Endpoint represents a reachable API endpoint. type Endpoint struct { // URL chart repository address @@ -80,3 +91,14 @@ type Endpoint struct { // Insecure if the endpoint needs a secure connection Insecure bool `json:"insecure,omitempty"` } + +type CommonSpec struct { + // Creator defines datasource creator (AUTO-FILLED by webhook) + Creator string `json:"creator,omitempty"` + + // DisplayName defines datasource display name + DisplayName string `json:"displayName,omitempty"` + + // Description defines datasource description + Description string `json:"description,omitempty"` +} diff --git a/api/v1alpha1/condition.go b/api/v1alpha1/condition.go index a4665ff4f..971a3c38c 100644 --- a/api/v1alpha1/condition.go +++ b/api/v1alpha1/condition.go @@ -207,3 +207,7 @@ func (s *ConditionedStatus) Equal(other *ConditionedStatus) bool { return true } + +func (s *ConditionedStatus) IsReady() bool { + return s.GetCondition(TypeReady).Status == corev1.ConditionTrue +} diff --git a/api/v1alpha1/dataset_types.go b/api/v1alpha1/dataset_types.go index 674130dbb..19626e38f 100644 --- a/api/v1alpha1/dataset_types.go +++ b/api/v1alpha1/dataset_types.go @@ -25,11 +25,7 @@ import ( // DatasetSpec defines the desired state of Dataset type DatasetSpec struct { - // Creator defines dataset creator(AUTO-FILLED by webhook) - Creator string `json:"creator,omitempty"` - - // DisplayName defines dataset display name - DiplayName string `json:"displayName"` + CommonSpec `json:",inline"` // ContentType defines dataset ContentType string `json:"contentType"` diff --git a/api/v1alpha1/datasource_types.go b/api/v1alpha1/datasource_types.go index 9d29c2b2e..6e3472bda 100644 --- a/api/v1alpha1/datasource_types.go +++ b/api/v1alpha1/datasource_types.go @@ -25,14 +25,7 @@ import ( // DatasourceSpec defines the desired state of Datasource type DatasourceSpec struct { - // Creator defines datasource creator(AUTO-FILLED by webhook) - Creator string `json:"creator,omitempty"` - - // DisplayName defines datasource display name - DiplayName string `json:"displayName"` - - // Description defines datasource description - Description string `json:"description,omitempty"` + CommonSpec `json:",inline"` // Enpoint defines connection info Enpoint *Endpoint `json:"endpoint,omitempty"` diff --git a/api/v1alpha1/embedder_types.go b/api/v1alpha1/embedder_types.go index d2ebd4e1d..5d2c55dbc 100644 --- a/api/v1alpha1/embedder_types.go +++ b/api/v1alpha1/embedder_types.go @@ -27,11 +27,7 @@ import ( // EmbedderSpec defines the desired state of Embedder type EmbedderSpec struct { - // INSERT ADDITIONAL SPEC FIELDS - desired state of cluster - // Important: Run "make" to regenerate code after modifying this file - - // Name of the Embedding service - DisplayName string `json:"displayName,omitempty"` + CommonSpec `json:",inline"` // ServiceType indicates the source type of embedding service ServiceType embeddings.EmbeddingType `json:"serviceType,omitempty"` @@ -50,6 +46,7 @@ type EmbedderStatus struct { //+kubebuilder:object:root=true //+kubebuilder:subresource:status +//+kubebuilder:printcolumn:name="display-name",type=string,JSONPath=`.spec.displayName` // Embedder is the Schema for the embeddings API type Embedder struct { diff --git a/api/v1alpha1/knowledgebase.go b/api/v1alpha1/knowledgebase.go new file mode 100644 index 000000000..08cc84e1e --- /dev/null +++ b/api/v1alpha1/knowledgebase.go @@ -0,0 +1,73 @@ +package v1alpha1 + +import ( + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +func (kb *KnowledgeBase) VectorStoreCollectionName() string { + return kb.Namespace + "_" + kb.Name +} + +func (kb *KnowledgeBase) InitCondition() Condition { + return Condition{ + Type: TypeReady, + Status: corev1.ConditionUnknown, + Reason: "Init", + Message: "Reconciliation in progress", + LastTransitionTime: metav1.Now(), + LastSuccessfulTime: metav1.Now(), + } +} + +func (kb *KnowledgeBase) PendingCondition(msg string) Condition { + return Condition{ + Type: TypeReady, + Status: corev1.ConditionFalse, + Reason: "Pending", + Message: msg, + LastTransitionTime: metav1.Now(), + LastSuccessfulTime: metav1.Now(), + } +} + +func (kb *KnowledgeBase) ErrorCondition(msg string) Condition { + return Condition{ + Type: TypeReady, + Status: corev1.ConditionFalse, + Reason: "Error", + Message: msg, + LastTransitionTime: metav1.Now(), + LastSuccessfulTime: metav1.Now(), + } +} + +func (kb *KnowledgeBase) ReadyCondition() Condition { + return Condition{ + Type: TypeReady, + Status: corev1.ConditionTrue, + LastTransitionTime: metav1.Now(), + LastSuccessfulTime: metav1.Now(), + Message: "Success", + } +} + +func (f *FileDetails) UpdateErr(err error) { + f.LastUpdateTime = metav1.Now() + if err != nil { + f.ErrMessage = err.Error() + f.Phase = FileProcessPhaseFailed + } else if f.Phase != FileProcessPhaseSucceeded { + f.Phase = FileProcessPhaseSucceeded + } +} + +func (f *FileGroupDetail) Init(group FileGroup) { + f.Datasource = group.Datasource.DeepCopy() + f.FileDetails = make([]FileDetails, len(group.Paths)) + for i := range group.Paths { + f.FileDetails[i].Path = group.Paths[i] + f.FileDetails[i].Phase = FileProcessPhasePending + f.FileDetails[i].LastUpdateTime = metav1.Now() + } +} diff --git a/api/v1alpha1/knowledgebase_types.go b/api/v1alpha1/knowledgebase_types.go new file mode 100644 index 000000000..8a1a871ff --- /dev/null +++ b/api/v1alpha1/knowledgebase_types.go @@ -0,0 +1,107 @@ +/* +Copyright 2023 KubeAGI. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// KnowledgeBaseSpec defines the desired state of KnowledgeBase +type KnowledgeBaseSpec struct { + CommonSpec `json:",inline"` + + // Embedder defines the embedder to embedding files + Embedder *TypedObjectReference `json:"embedder,omitempty"` + + // VectorStore defines the vectorstore to store results + VectorStore *TypedObjectReference `json:"vectorStore,omitempty"` + + // FileGroups included files Grouped by Datasource + FileGroups []FileGroup `json:"fileGroups,omitempty"` +} +type FileGroupDetail struct { + // From defines the datasource which provides these files + Datasource *TypedObjectReference `json:"datasource,omitempty"` + + // FileDetails is the detail files + FileDetails []FileDetails `json:"fileDetails,omitempty"` +} + +type FileDetails struct { + // Path defines the detail path to get objects from above datasource + Path string `json:"path,omitempty"` + + // Checksum defines the checksum of the file + Checksum string `json:"checksum,omitempty"` + + // The last time this condition was updated. + LastUpdateTime metav1.Time `json:"lastUpdateTime,omitempty"` + + // Phase defines the process phase + Phase FileProcessPhase `json:"phase,omitempty"` + + // ErrMessage defines the error message + ErrMessage string `json:"errMessage,omitempty"` +} + +type FileProcessPhase string + +const ( + FileProcessPhasePending FileProcessPhase = "Pending" + FileProcessPhaseProcessing FileProcessPhase = "Processing" + FileProcessPhaseSucceeded FileProcessPhase = "Succeeded" + FileProcessPhaseFailed FileProcessPhase = "Failed" +) + +// KnowledgeBaseStatus defines the observed state of KnowledgeBase +type KnowledgeBaseStatus struct { + // ObservedGeneration is the last observed generation. + // +optional + ObservedGeneration int64 `json:"observedGeneration,omitempty"` + + // FileGroupDetail is the detail of these files + FileGroupDetail []FileGroupDetail `json:"fileGroupDetail,omitempty"` + + // ConditionedStatus is the current status + ConditionedStatus `json:",inline"` +} + +//+kubebuilder:object:root=true +//+kubebuilder:subresource:status +//+kubebuilder:printcolumn:name="display-name",type=string,JSONPath=`.spec.displayName` + +// KnowledgeBase is the Schema for the knowledgebases API +type KnowledgeBase struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec KnowledgeBaseSpec `json:"spec,omitempty"` + Status KnowledgeBaseStatus `json:"status,omitempty"` +} + +//+kubebuilder:object:root=true + +// KnowledgeBaseList contains a list of KnowledgeBase +type KnowledgeBaseList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []KnowledgeBase `json:"items"` +} + +func init() { + SchemeBuilder.Register(&KnowledgeBase{}, &KnowledgeBaseList{}) +} diff --git a/api/v1alpha1/llm.go b/api/v1alpha1/llm.go index 5a8836d1f..acd1d4bc0 100644 --- a/api/v1alpha1/llm.go +++ b/api/v1alpha1/llm.go @@ -40,7 +40,7 @@ func (llmStatus LLMStatus) LLMReady() (string, bool) { if len(llmStatus.Conditions) == 0 { return "No conditions yet", false } - if llmStatus.Conditions[0].Type != TypeReady || llmStatus.Conditions[0].Status != corev1.ConditionTrue { + if !llmStatus.IsReady() { return "Bad condition", false } return "", true diff --git a/api/v1alpha1/llm_types.go b/api/v1alpha1/llm_types.go index 47e5286d8..d77e635ad 100644 --- a/api/v1alpha1/llm_types.go +++ b/api/v1alpha1/llm_types.go @@ -24,7 +24,8 @@ import ( // LLMSpec defines the desired state of LLM type LLMSpec struct { - DisplayName string `json:"displayName,omitempty"` + CommonSpec `json:",inline"` + // Type defines the type of llm Type llms.LLMType `json:"type"` diff --git a/api/v1alpha1/vectorstore.go b/api/v1alpha1/vectorstore.go new file mode 100644 index 000000000..01c82868e --- /dev/null +++ b/api/v1alpha1/vectorstore.go @@ -0,0 +1,88 @@ +/* +Copyright 2023 KubeAGI. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import ( + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +const ( + LabelVectorStoreType = Group + "/vectorstore-type" +) + +type VectorStoreType string + +const ( + VectorStoreTypeChroma VectorStoreType = "chroma" + VectorStoreTypeUnknown VectorStoreType = "unknown" +) + +func (vs VectorStoreSpec) Type() VectorStoreType { + if vs.Enpoint == nil { + return VectorStoreTypeUnknown + } + + if vs.Chroma != nil { + return VectorStoreTypeChroma + } + + return VectorStoreTypeUnknown +} + +func (vs *VectorStore) InitCondition() Condition { + return Condition{ + Type: TypeReady, + Status: corev1.ConditionUnknown, + LastTransitionTime: metav1.Now(), + LastSuccessfulTime: metav1.Now(), + Reason: "Init", + Message: "Reconciliation in progress", + } +} + +func (vs *VectorStore) PendingCondition(msg string) Condition { + return Condition{ + Type: TypeReady, + Status: corev1.ConditionFalse, + LastTransitionTime: metav1.Now(), + LastSuccessfulTime: metav1.Now(), + Reason: "Pending", + Message: msg, + } +} + +func (vs *VectorStore) ErrorCondition(msg string) Condition { + return Condition{ + Type: TypeReady, + Status: corev1.ConditionFalse, + LastTransitionTime: metav1.Now(), + LastSuccessfulTime: metav1.Now(), + Reason: "Error", + Message: msg, + } +} + +func (vs *VectorStore) ReadyCondition() Condition { + return Condition{ + Type: TypeReady, + Status: corev1.ConditionTrue, + LastTransitionTime: metav1.Now(), + LastSuccessfulTime: metav1.Now(), + Message: "Success", + } +} diff --git a/api/v1alpha1/vectorstore_types.go b/api/v1alpha1/vectorstore_types.go new file mode 100644 index 000000000..1449c1296 --- /dev/null +++ b/api/v1alpha1/vectorstore_types.go @@ -0,0 +1,69 @@ +/* +Copyright 2023 KubeAGI. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import ( + chromago "github.com/amikos-tech/chroma-go" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// VectorStoreSpec defines the desired state of VectorStore +type VectorStoreSpec struct { + CommonSpec `json:",inline"` + + // Enpoint defines connection info + Enpoint *Endpoint `json:"endpoint,omitempty"` + + Chroma *Chroma `json:"chroma,omitempty"` +} + +// Chroma defines the configuration of Chroma +type Chroma struct { + DistanceFunction chromago.DistanceFunction `json:"distanceFunction,omitempty"` +} + +// VectorStoreStatus defines the observed state of VectorStore +type VectorStoreStatus struct { + // ConditionedStatus is the current status + ConditionedStatus `json:",inline"` +} + +//+kubebuilder:object:root=true +//+kubebuilder:subresource:status +//+kubebuilder:printcolumn:name="display-name",type=string,JSONPath=`.spec.displayName` + +// VectorStore is the Schema for the vectorstores API +type VectorStore struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec VectorStoreSpec `json:"spec,omitempty"` + Status VectorStoreStatus `json:"status,omitempty"` +} + +//+kubebuilder:object:root=true + +// VectorStoreList contains a list of VectorStore +type VectorStoreList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []VectorStore `json:"items"` +} + +func init() { + SchemeBuilder.Register(&VectorStore{}, &VectorStoreList{}) +} diff --git a/api/v1alpha1/worker_types.go b/api/v1alpha1/worker_types.go index 9eb42d7c2..7e792cac4 100644 --- a/api/v1alpha1/worker_types.go +++ b/api/v1alpha1/worker_types.go @@ -25,11 +25,7 @@ import ( // WorkerSpec defines the desired state of Worker type WorkerSpec struct { - // Creator defines dataset creator(AUTO-FILLED by webhook) - Creator string `json:"creator,omitempty"` - - // DisplayName defines dataset display name - DiplayName string `json:"displayName"` + CommonSpec `json:",inline"` } // WorkerStatus defines the observed state of Worker diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 1f5343a92..51560adb9 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -26,6 +26,36 @@ import ( "k8s.io/apimachinery/pkg/runtime" ) +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Chroma) DeepCopyInto(out *Chroma) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Chroma. +func (in *Chroma) DeepCopy() *Chroma { + if in == nil { + return nil + } + out := new(Chroma) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *CommonSpec) DeepCopyInto(out *CommonSpec) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CommonSpec. +func (in *CommonSpec) DeepCopy() *CommonSpec { + if in == nil { + return nil + } + out := new(CommonSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Condition) DeepCopyInto(out *Condition) { *out = *in @@ -127,6 +157,7 @@ func (in *DatasetList) DeepCopyObject() runtime.Object { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *DatasetSpec) DeepCopyInto(out *DatasetSpec) { *out = *in + out.CommonSpec = in.CommonSpec } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DatasetSpec. @@ -217,6 +248,7 @@ func (in *DatasourceList) DeepCopyObject() runtime.Object { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *DatasourceSpec) DeepCopyInto(out *DatasourceSpec) { *out = *in + out.CommonSpec = in.CommonSpec if in.Enpoint != nil { in, out := &in.Enpoint, &out.Enpoint *out = new(Endpoint) @@ -317,6 +349,7 @@ func (in *EmbedderList) DeepCopyObject() runtime.Object { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *EmbedderSpec) DeepCopyInto(out *EmbedderSpec) { *out = *in + out.CommonSpec = in.CommonSpec if in.Enpoint != nil { in, out := &in.Enpoint, &out.Enpoint *out = new(Endpoint) @@ -370,6 +403,22 @@ func (in *Endpoint) DeepCopy() *Endpoint { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *FileDetails) DeepCopyInto(out *FileDetails) { + *out = *in + in.LastUpdateTime.DeepCopyInto(&out.LastUpdateTime) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FileDetails. +func (in *FileDetails) DeepCopy() *FileDetails { + if in == nil { + return nil + } + out := new(FileDetails) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *FileGroup) DeepCopyInto(out *FileGroup) { *out = *in @@ -395,6 +444,148 @@ func (in *FileGroup) DeepCopy() *FileGroup { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *FileGroupDetail) DeepCopyInto(out *FileGroupDetail) { + *out = *in + if in.Datasource != nil { + in, out := &in.Datasource, &out.Datasource + *out = new(TypedObjectReference) + (*in).DeepCopyInto(*out) + } + if in.FileDetails != nil { + in, out := &in.FileDetails, &out.FileDetails + *out = make([]FileDetails, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FileGroupDetail. +func (in *FileGroupDetail) DeepCopy() *FileGroupDetail { + if in == nil { + return nil + } + out := new(FileGroupDetail) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *KnowledgeBase) DeepCopyInto(out *KnowledgeBase) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KnowledgeBase. +func (in *KnowledgeBase) DeepCopy() *KnowledgeBase { + if in == nil { + return nil + } + out := new(KnowledgeBase) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *KnowledgeBase) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *KnowledgeBaseList) DeepCopyInto(out *KnowledgeBaseList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]KnowledgeBase, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KnowledgeBaseList. +func (in *KnowledgeBaseList) DeepCopy() *KnowledgeBaseList { + if in == nil { + return nil + } + out := new(KnowledgeBaseList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *KnowledgeBaseList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *KnowledgeBaseSpec) DeepCopyInto(out *KnowledgeBaseSpec) { + *out = *in + out.CommonSpec = in.CommonSpec + if in.Embedder != nil { + in, out := &in.Embedder, &out.Embedder + *out = new(TypedObjectReference) + (*in).DeepCopyInto(*out) + } + if in.VectorStore != nil { + in, out := &in.VectorStore, &out.VectorStore + *out = new(TypedObjectReference) + (*in).DeepCopyInto(*out) + } + if in.FileGroups != nil { + in, out := &in.FileGroups, &out.FileGroups + *out = make([]FileGroup, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KnowledgeBaseSpec. +func (in *KnowledgeBaseSpec) DeepCopy() *KnowledgeBaseSpec { + if in == nil { + return nil + } + out := new(KnowledgeBaseSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *KnowledgeBaseStatus) DeepCopyInto(out *KnowledgeBaseStatus) { + *out = *in + if in.FileGroupDetail != nil { + in, out := &in.FileGroupDetail, &out.FileGroupDetail + *out = make([]FileGroupDetail, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + in.ConditionedStatus.DeepCopyInto(&out.ConditionedStatus) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KnowledgeBaseStatus. +func (in *KnowledgeBaseStatus) DeepCopy() *KnowledgeBaseStatus { + if in == nil { + return nil + } + out := new(KnowledgeBaseStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *LLM) DeepCopyInto(out *LLM) { *out = *in @@ -457,6 +648,7 @@ func (in *LLMList) DeepCopyObject() runtime.Object { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *LLMSpec) DeepCopyInto(out *LLMSpec) { *out = *in + out.CommonSpec = in.CommonSpec if in.Enpoint != nil { in, out := &in.Enpoint, &out.Enpoint *out = new(Endpoint) @@ -809,6 +1001,107 @@ func (in *TypedObjectReference) DeepCopy() *TypedObjectReference { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *VectorStore) DeepCopyInto(out *VectorStore) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VectorStore. +func (in *VectorStore) DeepCopy() *VectorStore { + if in == nil { + return nil + } + out := new(VectorStore) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *VectorStore) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *VectorStoreList) DeepCopyInto(out *VectorStoreList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]VectorStore, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VectorStoreList. +func (in *VectorStoreList) DeepCopy() *VectorStoreList { + if in == nil { + return nil + } + out := new(VectorStoreList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *VectorStoreList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *VectorStoreSpec) DeepCopyInto(out *VectorStoreSpec) { + *out = *in + out.CommonSpec = in.CommonSpec + if in.Enpoint != nil { + in, out := &in.Enpoint, &out.Enpoint + *out = new(Endpoint) + (*in).DeepCopyInto(*out) + } + if in.Chroma != nil { + in, out := &in.Chroma, &out.Chroma + *out = new(Chroma) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VectorStoreSpec. +func (in *VectorStoreSpec) DeepCopy() *VectorStoreSpec { + if in == nil { + return nil + } + out := new(VectorStoreSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *VectorStoreStatus) DeepCopyInto(out *VectorStoreStatus) { + *out = *in + in.ConditionedStatus.DeepCopyInto(&out.ConditionedStatus) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VectorStoreStatus. +func (in *VectorStoreStatus) DeepCopy() *VectorStoreStatus { + if in == nil { + return nil + } + out := new(VectorStoreStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *VersionedDataset) DeepCopyInto(out *VersionedDataset) { *out = *in @@ -973,6 +1266,7 @@ func (in *WorkerList) DeepCopyObject() runtime.Object { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *WorkerSpec) DeepCopyInto(out *WorkerSpec) { *out = *in + out.CommonSpec = in.CommonSpec } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new WorkerSpec. diff --git a/charts/arcadia/Chart.yaml b/charts/arcadia/Chart.yaml index 16c056a31..13bb3f1db 100644 --- a/charts/arcadia/Chart.yaml +++ b/charts/arcadia/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: arcadia description: A Helm chart(KubeBB Component) for KubeAGI Arcadia type: application -version: 0.1.17 +version: 0.1.18 appVersion: "0.0.1" keywords: diff --git a/charts/arcadia/crds/arcadia.kubeagi.k8s.com.cn_datasets.yaml b/charts/arcadia/crds/arcadia.kubeagi.k8s.com.cn_datasets.yaml index 912e593aa..6c08fb927 100644 --- a/charts/arcadia/crds/arcadia.kubeagi.k8s.com.cn_datasets.yaml +++ b/charts/arcadia/crds/arcadia.kubeagi.k8s.com.cn_datasets.yaml @@ -49,14 +49,16 @@ spec: description: ContentType defines dataset type: string creator: - description: Creator defines dataset creator(AUTO-FILLED by webhook) + description: Creator defines datasource creator (AUTO-FILLED by webhook) + type: string + description: + description: Description defines datasource description type: string displayName: - description: DisplayName defines dataset display name + description: DisplayName defines datasource display name type: string required: - contentType - - displayName type: object status: description: DatasetStatus defines the observed state of Dataset diff --git a/charts/arcadia/crds/arcadia.kubeagi.k8s.com.cn_datasources.yaml b/charts/arcadia/crds/arcadia.kubeagi.k8s.com.cn_datasources.yaml index 2883a043d..74ced126e 100644 --- a/charts/arcadia/crds/arcadia.kubeagi.k8s.com.cn_datasources.yaml +++ b/charts/arcadia/crds/arcadia.kubeagi.k8s.com.cn_datasources.yaml @@ -43,7 +43,7 @@ spec: description: DatasourceSpec defines the desired state of Datasource properties: creator: - description: Creator defines datasource creator(AUTO-FILLED by webhook) + description: Creator defines datasource creator (AUTO-FILLED by webhook) type: string description: description: Description defines datasource description @@ -95,8 +95,6 @@ spec: description: Object must end with a slash "/" if it is a directory type: string type: object - required: - - displayName type: object status: description: DatasourceStatus defines the observed state of Datasource diff --git a/charts/arcadia/crds/arcadia.kubeagi.k8s.com.cn_embedders.yaml b/charts/arcadia/crds/arcadia.kubeagi.k8s.com.cn_embedders.yaml index 281c9d367..6ed4f41b7 100644 --- a/charts/arcadia/crds/arcadia.kubeagi.k8s.com.cn_embedders.yaml +++ b/charts/arcadia/crds/arcadia.kubeagi.k8s.com.cn_embedders.yaml @@ -15,7 +15,11 @@ spec: singular: embedder scope: Namespaced versions: - - name: v1alpha1 + - additionalPrinterColumns: + - jsonPath: .spec.displayName + name: display-name + type: string + name: v1alpha1 schema: openAPIV3Schema: description: Embedder is the Schema for the embeddings API @@ -35,8 +39,14 @@ spec: spec: description: EmbedderSpec defines the desired state of Embedder properties: + creator: + description: Creator defines datasource creator (AUTO-FILLED by webhook) + type: string + description: + description: Description defines datasource description + type: string displayName: - description: Name of the Embedding service + description: DisplayName defines datasource display name type: string endpoint: description: Enpoint defines connection info diff --git a/charts/arcadia/crds/arcadia.kubeagi.k8s.com.cn_knowledgebases.yaml b/charts/arcadia/crds/arcadia.kubeagi.k8s.com.cn_knowledgebases.yaml new file mode 100644 index 000000000..5961484b3 --- /dev/null +++ b/charts/arcadia/crds/arcadia.kubeagi.k8s.com.cn_knowledgebases.yaml @@ -0,0 +1,238 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.9.2 + creationTimestamp: null + name: knowledgebases.arcadia.kubeagi.k8s.com.cn +spec: + group: arcadia.kubeagi.k8s.com.cn + names: + kind: KnowledgeBase + listKind: KnowledgeBaseList + plural: knowledgebases + singular: knowledgebase + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .spec.displayName + name: display-name + type: string + name: v1alpha1 + schema: + openAPIV3Schema: + description: KnowledgeBase is the Schema for the knowledgebases API + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation + of an object. Servers should convert recognized schemas to the latest + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents. Servers may infer this from the endpoint the client + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + metadata: + type: object + spec: + description: KnowledgeBaseSpec defines the desired state of KnowledgeBase + properties: + creator: + description: Creator defines datasource creator (AUTO-FILLED by webhook) + type: string + description: + description: Description defines datasource description + type: string + displayName: + description: DisplayName defines datasource display name + type: string + embedder: + description: Embedder defines the embedder to embedding files + properties: + apiGroup: + description: APIGroup is the group for the resource being referenced. + If APIGroup is not specified, the specified Kind must be in + the core API group. For any other third-party types, APIGroup + is required. + type: string + kind: + description: Kind is the type of resource being referenced + type: string + name: + description: Name is the name of resource being referenced + type: string + namespace: + description: Namespace is the namespace of resource being referenced + type: string + required: + - kind + - name + type: object + fileGroups: + description: FileGroups included files Grouped by Datasource + items: + properties: + datasource: + description: From defines the datasource which provides this + `File` + properties: + apiGroup: + description: APIGroup is the group for the resource being + referenced. If APIGroup is not specified, the specified + Kind must be in the core API group. For any other third-party + types, APIGroup is required. + type: string + kind: + description: Kind is the type of resource being referenced + type: string + name: + description: Name is the name of resource being referenced + type: string + namespace: + description: Namespace is the namespace of resource being + referenced + type: string + required: + - kind + - name + type: object + paths: + description: Paths defines the detail paths to get objects from + above datasource + items: + type: string + type: array + required: + - datasource + - paths + type: object + type: array + vectorStore: + description: VectorStore defines the vectorstore to store results + properties: + apiGroup: + description: APIGroup is the group for the resource being referenced. + If APIGroup is not specified, the specified Kind must be in + the core API group. For any other third-party types, APIGroup + is required. + type: string + kind: + description: Kind is the type of resource being referenced + type: string + name: + description: Name is the name of resource being referenced + type: string + namespace: + description: Namespace is the namespace of resource being referenced + type: string + required: + - kind + - name + type: object + type: object + status: + description: KnowledgeBaseStatus defines the observed state of KnowledgeBase + properties: + conditions: + description: Conditions of the resource. + items: + description: A Condition that may apply to a resource. + properties: + lastSuccessfulTime: + description: LastSuccessfulTime is repository Last Successful + Update Time + format: date-time + type: string + lastTransitionTime: + description: LastTransitionTime is the last time this condition + transitioned from one status to another. + format: date-time + type: string + message: + description: A Message containing details about this condition's + last transition from one status to another, if any. + type: string + reason: + description: A Reason for this condition's last transition from + one status to another. + type: string + status: + description: Status of this condition; is it currently True, + False, or Unknown + type: string + type: + description: Type of this condition. At most one of each condition + type may apply to a resource at any point in time. + type: string + required: + - lastTransitionTime + - reason + - status + - type + type: object + type: array + fileGroupDetail: + description: FileGroupDetail is the detail of these files + items: + properties: + datasource: + description: From defines the datasource which provides these + files + properties: + apiGroup: + description: APIGroup is the group for the resource being + referenced. If APIGroup is not specified, the specified + Kind must be in the core API group. For any other third-party + types, APIGroup is required. + type: string + kind: + description: Kind is the type of resource being referenced + type: string + name: + description: Name is the name of resource being referenced + type: string + namespace: + description: Namespace is the namespace of resource being + referenced + type: string + required: + - kind + - name + type: object + fileDetails: + description: FileDetails is the detail files + items: + properties: + checksum: + description: Checksum defines the checksum of the file + type: string + errMessage: + description: ErrMessage defines the error message + type: string + lastUpdateTime: + description: The last time this condition was updated. + format: date-time + type: string + path: + description: Path defines the detail path to get objects + from above datasource + type: string + phase: + description: Phase defines the process phase + type: string + type: object + type: array + type: object + type: array + observedGeneration: + description: ObservedGeneration is the last observed generation. + format: int64 + type: integer + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/charts/arcadia/crds/arcadia.kubeagi.k8s.com.cn_llms.yaml b/charts/arcadia/crds/arcadia.kubeagi.k8s.com.cn_llms.yaml index ad0e74be9..b8d31a81e 100644 --- a/charts/arcadia/crds/arcadia.kubeagi.k8s.com.cn_llms.yaml +++ b/charts/arcadia/crds/arcadia.kubeagi.k8s.com.cn_llms.yaml @@ -35,7 +35,14 @@ spec: spec: description: LLMSpec defines the desired state of LLM properties: + creator: + description: Creator defines datasource creator (AUTO-FILLED by webhook) + type: string + description: + description: Description defines datasource description + type: string displayName: + description: DisplayName defines datasource display name type: string endpoint: description: Enpoint defines connection info diff --git a/charts/arcadia/crds/arcadia.kubeagi.k8s.com.cn_vectorstores.yaml b/charts/arcadia/crds/arcadia.kubeagi.k8s.com.cn_vectorstores.yaml new file mode 100644 index 000000000..c9148f3db --- /dev/null +++ b/charts/arcadia/crds/arcadia.kubeagi.k8s.com.cn_vectorstores.yaml @@ -0,0 +1,139 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.9.2 + creationTimestamp: null + name: vectorstores.arcadia.kubeagi.k8s.com.cn +spec: + group: arcadia.kubeagi.k8s.com.cn + names: + kind: VectorStore + listKind: VectorStoreList + plural: vectorstores + singular: vectorstore + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .spec.displayName + name: display-name + type: string + name: v1alpha1 + schema: + openAPIV3Schema: + description: VectorStore is the Schema for the vectorstores API + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation + of an object. Servers should convert recognized schemas to the latest + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents. Servers may infer this from the endpoint the client + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + metadata: + type: object + spec: + description: VectorStoreSpec defines the desired state of VectorStore + properties: + chroma: + description: Chroma defines the configuration of Chroma + properties: + distanceFunction: + type: string + type: object + creator: + description: Creator defines datasource creator (AUTO-FILLED by webhook) + type: string + description: + description: Description defines datasource description + type: string + displayName: + description: DisplayName defines datasource display name + type: string + endpoint: + description: Enpoint defines connection info + properties: + authSecret: + description: AuthSecret if the chart repository requires auth + authentication, set the username and password to secret, with + the field user and password respectively. + properties: + apiGroup: + description: APIGroup is the group for the resource being + referenced. If APIGroup is not specified, the specified + Kind must be in the core API group. For any other third-party + types, APIGroup is required. + type: string + kind: + description: Kind is the type of resource being referenced + type: string + name: + description: Name is the name of resource being referenced + type: string + namespace: + description: Namespace is the namespace of resource being + referenced + type: string + required: + - kind + - name + type: object + insecure: + description: Insecure if the endpoint needs a secure connection + type: boolean + url: + description: URL chart repository address + type: string + type: object + type: object + status: + description: VectorStoreStatus defines the observed state of VectorStore + properties: + conditions: + description: Conditions of the resource. + items: + description: A Condition that may apply to a resource. + properties: + lastSuccessfulTime: + description: LastSuccessfulTime is repository Last Successful + Update Time + format: date-time + type: string + lastTransitionTime: + description: LastTransitionTime is the last time this condition + transitioned from one status to another. + format: date-time + type: string + message: + description: A Message containing details about this condition's + last transition from one status to another, if any. + type: string + reason: + description: A Reason for this condition's last transition from + one status to another. + type: string + status: + description: Status of this condition; is it currently True, + False, or Unknown + type: string + type: + description: Type of this condition. At most one of each condition + type may apply to a resource at any point in time. + type: string + required: + - lastTransitionTime + - reason + - status + - type + type: object + type: array + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/charts/arcadia/crds/arcadia.kubeagi.k8s.com.cn_workers.yaml b/charts/arcadia/crds/arcadia.kubeagi.k8s.com.cn_workers.yaml index fc1d1f4bf..b9f7dff5f 100644 --- a/charts/arcadia/crds/arcadia.kubeagi.k8s.com.cn_workers.yaml +++ b/charts/arcadia/crds/arcadia.kubeagi.k8s.com.cn_workers.yaml @@ -36,13 +36,14 @@ spec: description: WorkerSpec defines the desired state of Worker properties: creator: - description: Creator defines dataset creator(AUTO-FILLED by webhook) + description: Creator defines datasource creator (AUTO-FILLED by webhook) + type: string + description: + description: Description defines datasource description type: string displayName: - description: DisplayName defines dataset display name + description: DisplayName defines datasource display name type: string - required: - - displayName type: object status: description: WorkerStatus defines the observed state of Worker diff --git a/charts/arcadia/templates/rbac.yaml b/charts/arcadia/templates/rbac.yaml index e15f708bf..7d9881c57 100644 --- a/charts/arcadia/templates/rbac.yaml +++ b/charts/arcadia/templates/rbac.yaml @@ -116,6 +116,32 @@ rules: - get - patch - update +- apiGroups: + - arcadia.kubeagi.k8s.com.cn + resources: + - knowledgebases + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - arcadia.kubeagi.k8s.com.cn + resources: + - knowledgebases/finalizers + verbs: + - update +- apiGroups: + - arcadia.kubeagi.k8s.com.cn + resources: + - knowledgebases/status + verbs: + - get + - patch + - update - apiGroups: - arcadia.kubeagi.k8s.com.cn resources: @@ -141,8 +167,7 @@ rules: verbs: - get - patch - - update -- apiGroups: + - update apiGroups: - arcadia.kubeagi.k8s.com.cn resources: - llms @@ -220,6 +245,32 @@ rules: - get - patch - update +- apiGroups: + - arcadia.kubeagi.k8s.com.cn + resources: + - vectorstores + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - arcadia.kubeagi.k8s.com.cn + resources: + - vectorstores/finalizers + verbs: + - update +- apiGroups: + - arcadia.kubeagi.k8s.com.cn + resources: + - vectorstores/status + verbs: + - get + - patch + - update - apiGroups: - arcadia.kubeagi.k8s.com.cn resources: diff --git a/config/crd/bases/arcadia.kubeagi.k8s.com.cn_datasets.yaml b/config/crd/bases/arcadia.kubeagi.k8s.com.cn_datasets.yaml index 912e593aa..6c08fb927 100644 --- a/config/crd/bases/arcadia.kubeagi.k8s.com.cn_datasets.yaml +++ b/config/crd/bases/arcadia.kubeagi.k8s.com.cn_datasets.yaml @@ -49,14 +49,16 @@ spec: description: ContentType defines dataset type: string creator: - description: Creator defines dataset creator(AUTO-FILLED by webhook) + description: Creator defines datasource creator (AUTO-FILLED by webhook) + type: string + description: + description: Description defines datasource description type: string displayName: - description: DisplayName defines dataset display name + description: DisplayName defines datasource display name type: string required: - contentType - - displayName type: object status: description: DatasetStatus defines the observed state of Dataset diff --git a/config/crd/bases/arcadia.kubeagi.k8s.com.cn_datasources.yaml b/config/crd/bases/arcadia.kubeagi.k8s.com.cn_datasources.yaml index 2883a043d..74ced126e 100644 --- a/config/crd/bases/arcadia.kubeagi.k8s.com.cn_datasources.yaml +++ b/config/crd/bases/arcadia.kubeagi.k8s.com.cn_datasources.yaml @@ -43,7 +43,7 @@ spec: description: DatasourceSpec defines the desired state of Datasource properties: creator: - description: Creator defines datasource creator(AUTO-FILLED by webhook) + description: Creator defines datasource creator (AUTO-FILLED by webhook) type: string description: description: Description defines datasource description @@ -95,8 +95,6 @@ spec: description: Object must end with a slash "/" if it is a directory type: string type: object - required: - - displayName type: object status: description: DatasourceStatus defines the observed state of Datasource diff --git a/config/crd/bases/arcadia.kubeagi.k8s.com.cn_embedders.yaml b/config/crd/bases/arcadia.kubeagi.k8s.com.cn_embedders.yaml index 281c9d367..6ed4f41b7 100644 --- a/config/crd/bases/arcadia.kubeagi.k8s.com.cn_embedders.yaml +++ b/config/crd/bases/arcadia.kubeagi.k8s.com.cn_embedders.yaml @@ -15,7 +15,11 @@ spec: singular: embedder scope: Namespaced versions: - - name: v1alpha1 + - additionalPrinterColumns: + - jsonPath: .spec.displayName + name: display-name + type: string + name: v1alpha1 schema: openAPIV3Schema: description: Embedder is the Schema for the embeddings API @@ -35,8 +39,14 @@ spec: spec: description: EmbedderSpec defines the desired state of Embedder properties: + creator: + description: Creator defines datasource creator (AUTO-FILLED by webhook) + type: string + description: + description: Description defines datasource description + type: string displayName: - description: Name of the Embedding service + description: DisplayName defines datasource display name type: string endpoint: description: Enpoint defines connection info diff --git a/config/crd/bases/arcadia.kubeagi.k8s.com.cn_knowledgebases.yaml b/config/crd/bases/arcadia.kubeagi.k8s.com.cn_knowledgebases.yaml new file mode 100644 index 000000000..5961484b3 --- /dev/null +++ b/config/crd/bases/arcadia.kubeagi.k8s.com.cn_knowledgebases.yaml @@ -0,0 +1,238 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.9.2 + creationTimestamp: null + name: knowledgebases.arcadia.kubeagi.k8s.com.cn +spec: + group: arcadia.kubeagi.k8s.com.cn + names: + kind: KnowledgeBase + listKind: KnowledgeBaseList + plural: knowledgebases + singular: knowledgebase + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .spec.displayName + name: display-name + type: string + name: v1alpha1 + schema: + openAPIV3Schema: + description: KnowledgeBase is the Schema for the knowledgebases API + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation + of an object. Servers should convert recognized schemas to the latest + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents. Servers may infer this from the endpoint the client + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + metadata: + type: object + spec: + description: KnowledgeBaseSpec defines the desired state of KnowledgeBase + properties: + creator: + description: Creator defines datasource creator (AUTO-FILLED by webhook) + type: string + description: + description: Description defines datasource description + type: string + displayName: + description: DisplayName defines datasource display name + type: string + embedder: + description: Embedder defines the embedder to embedding files + properties: + apiGroup: + description: APIGroup is the group for the resource being referenced. + If APIGroup is not specified, the specified Kind must be in + the core API group. For any other third-party types, APIGroup + is required. + type: string + kind: + description: Kind is the type of resource being referenced + type: string + name: + description: Name is the name of resource being referenced + type: string + namespace: + description: Namespace is the namespace of resource being referenced + type: string + required: + - kind + - name + type: object + fileGroups: + description: FileGroups included files Grouped by Datasource + items: + properties: + datasource: + description: From defines the datasource which provides this + `File` + properties: + apiGroup: + description: APIGroup is the group for the resource being + referenced. If APIGroup is not specified, the specified + Kind must be in the core API group. For any other third-party + types, APIGroup is required. + type: string + kind: + description: Kind is the type of resource being referenced + type: string + name: + description: Name is the name of resource being referenced + type: string + namespace: + description: Namespace is the namespace of resource being + referenced + type: string + required: + - kind + - name + type: object + paths: + description: Paths defines the detail paths to get objects from + above datasource + items: + type: string + type: array + required: + - datasource + - paths + type: object + type: array + vectorStore: + description: VectorStore defines the vectorstore to store results + properties: + apiGroup: + description: APIGroup is the group for the resource being referenced. + If APIGroup is not specified, the specified Kind must be in + the core API group. For any other third-party types, APIGroup + is required. + type: string + kind: + description: Kind is the type of resource being referenced + type: string + name: + description: Name is the name of resource being referenced + type: string + namespace: + description: Namespace is the namespace of resource being referenced + type: string + required: + - kind + - name + type: object + type: object + status: + description: KnowledgeBaseStatus defines the observed state of KnowledgeBase + properties: + conditions: + description: Conditions of the resource. + items: + description: A Condition that may apply to a resource. + properties: + lastSuccessfulTime: + description: LastSuccessfulTime is repository Last Successful + Update Time + format: date-time + type: string + lastTransitionTime: + description: LastTransitionTime is the last time this condition + transitioned from one status to another. + format: date-time + type: string + message: + description: A Message containing details about this condition's + last transition from one status to another, if any. + type: string + reason: + description: A Reason for this condition's last transition from + one status to another. + type: string + status: + description: Status of this condition; is it currently True, + False, or Unknown + type: string + type: + description: Type of this condition. At most one of each condition + type may apply to a resource at any point in time. + type: string + required: + - lastTransitionTime + - reason + - status + - type + type: object + type: array + fileGroupDetail: + description: FileGroupDetail is the detail of these files + items: + properties: + datasource: + description: From defines the datasource which provides these + files + properties: + apiGroup: + description: APIGroup is the group for the resource being + referenced. If APIGroup is not specified, the specified + Kind must be in the core API group. For any other third-party + types, APIGroup is required. + type: string + kind: + description: Kind is the type of resource being referenced + type: string + name: + description: Name is the name of resource being referenced + type: string + namespace: + description: Namespace is the namespace of resource being + referenced + type: string + required: + - kind + - name + type: object + fileDetails: + description: FileDetails is the detail files + items: + properties: + checksum: + description: Checksum defines the checksum of the file + type: string + errMessage: + description: ErrMessage defines the error message + type: string + lastUpdateTime: + description: The last time this condition was updated. + format: date-time + type: string + path: + description: Path defines the detail path to get objects + from above datasource + type: string + phase: + description: Phase defines the process phase + type: string + type: object + type: array + type: object + type: array + observedGeneration: + description: ObservedGeneration is the last observed generation. + format: int64 + type: integer + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/config/crd/bases/arcadia.kubeagi.k8s.com.cn_llms.yaml b/config/crd/bases/arcadia.kubeagi.k8s.com.cn_llms.yaml index ad0e74be9..b8d31a81e 100644 --- a/config/crd/bases/arcadia.kubeagi.k8s.com.cn_llms.yaml +++ b/config/crd/bases/arcadia.kubeagi.k8s.com.cn_llms.yaml @@ -35,7 +35,14 @@ spec: spec: description: LLMSpec defines the desired state of LLM properties: + creator: + description: Creator defines datasource creator (AUTO-FILLED by webhook) + type: string + description: + description: Description defines datasource description + type: string displayName: + description: DisplayName defines datasource display name type: string endpoint: description: Enpoint defines connection info diff --git a/config/crd/bases/arcadia.kubeagi.k8s.com.cn_vectorstores.yaml b/config/crd/bases/arcadia.kubeagi.k8s.com.cn_vectorstores.yaml new file mode 100644 index 000000000..c9148f3db --- /dev/null +++ b/config/crd/bases/arcadia.kubeagi.k8s.com.cn_vectorstores.yaml @@ -0,0 +1,139 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.9.2 + creationTimestamp: null + name: vectorstores.arcadia.kubeagi.k8s.com.cn +spec: + group: arcadia.kubeagi.k8s.com.cn + names: + kind: VectorStore + listKind: VectorStoreList + plural: vectorstores + singular: vectorstore + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .spec.displayName + name: display-name + type: string + name: v1alpha1 + schema: + openAPIV3Schema: + description: VectorStore is the Schema for the vectorstores API + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation + of an object. Servers should convert recognized schemas to the latest + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents. Servers may infer this from the endpoint the client + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + metadata: + type: object + spec: + description: VectorStoreSpec defines the desired state of VectorStore + properties: + chroma: + description: Chroma defines the configuration of Chroma + properties: + distanceFunction: + type: string + type: object + creator: + description: Creator defines datasource creator (AUTO-FILLED by webhook) + type: string + description: + description: Description defines datasource description + type: string + displayName: + description: DisplayName defines datasource display name + type: string + endpoint: + description: Enpoint defines connection info + properties: + authSecret: + description: AuthSecret if the chart repository requires auth + authentication, set the username and password to secret, with + the field user and password respectively. + properties: + apiGroup: + description: APIGroup is the group for the resource being + referenced. If APIGroup is not specified, the specified + Kind must be in the core API group. For any other third-party + types, APIGroup is required. + type: string + kind: + description: Kind is the type of resource being referenced + type: string + name: + description: Name is the name of resource being referenced + type: string + namespace: + description: Namespace is the namespace of resource being + referenced + type: string + required: + - kind + - name + type: object + insecure: + description: Insecure if the endpoint needs a secure connection + type: boolean + url: + description: URL chart repository address + type: string + type: object + type: object + status: + description: VectorStoreStatus defines the observed state of VectorStore + properties: + conditions: + description: Conditions of the resource. + items: + description: A Condition that may apply to a resource. + properties: + lastSuccessfulTime: + description: LastSuccessfulTime is repository Last Successful + Update Time + format: date-time + type: string + lastTransitionTime: + description: LastTransitionTime is the last time this condition + transitioned from one status to another. + format: date-time + type: string + message: + description: A Message containing details about this condition's + last transition from one status to another, if any. + type: string + reason: + description: A Reason for this condition's last transition from + one status to another. + type: string + status: + description: Status of this condition; is it currently True, + False, or Unknown + type: string + type: + description: Type of this condition. At most one of each condition + type may apply to a resource at any point in time. + type: string + required: + - lastTransitionTime + - reason + - status + - type + type: object + type: array + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/config/crd/bases/arcadia.kubeagi.k8s.com.cn_workers.yaml b/config/crd/bases/arcadia.kubeagi.k8s.com.cn_workers.yaml index fc1d1f4bf..b9f7dff5f 100644 --- a/config/crd/bases/arcadia.kubeagi.k8s.com.cn_workers.yaml +++ b/config/crd/bases/arcadia.kubeagi.k8s.com.cn_workers.yaml @@ -36,13 +36,14 @@ spec: description: WorkerSpec defines the desired state of Worker properties: creator: - description: Creator defines dataset creator(AUTO-FILLED by webhook) + description: Creator defines datasource creator (AUTO-FILLED by webhook) + type: string + description: + description: Description defines datasource description type: string displayName: - description: DisplayName defines dataset display name + description: DisplayName defines datasource display name type: string - required: - - displayName type: object status: description: WorkerStatus defines the observed state of Worker diff --git a/config/crd/kustomization.yaml b/config/crd/kustomization.yaml index 8af4c452b..f9dc429a0 100644 --- a/config/crd/kustomization.yaml +++ b/config/crd/kustomization.yaml @@ -11,6 +11,8 @@ resources: - bases/arcadia.kubeagi.k8s.com.cn_versioneddatasets.yaml - bases/arcadia.kubeagi.k8s.com.cn_workers.yaml - bases/arcadia.kubeagi.k8s.com.cn_models.yaml +- bases/arcadia.kubeagi.k8s.com.cn_knowledgebases.yaml +- bases/arcadia.kubeagi.k8s.com.cn_vectorstores.yaml #+kubebuilder:scaffold:crdkustomizeresource patchesStrategicMerge: @@ -23,6 +25,8 @@ patchesStrategicMerge: #- patches/webhook_in_embedders.yaml #- patches/webhook_in_datasets.yaml #- patches/webhook_in_versioneddatasets.yaml +#- patches/webhook_in_knowledgebases.yaml +#- patches/webhook_in_vectorstores.yaml #- patches/webhook_in_workers.yaml #- patches/webhook_in_models.yaml #+kubebuilder:scaffold:crdkustomizewebhookpatch @@ -38,6 +42,8 @@ patchesStrategicMerge: #- patches/cainjection_in_versioneddatasets.yaml #- patches/cainjection_in_workers.yaml #- patches/cainjection_in_models.yaml +#- patches/cainjection_in_knowledgebases.yaml +#- patches/cainjection_in_vectorstores.yaml #+kubebuilder:scaffold:crdkustomizecainjectionpatch # the following config is for teaching kustomize how to do kustomization for CRDs. diff --git a/config/crd/patches/cainjection_in_knowledgebases.yaml b/config/crd/patches/cainjection_in_knowledgebases.yaml new file mode 100644 index 000000000..2cf92743d --- /dev/null +++ b/config/crd/patches/cainjection_in_knowledgebases.yaml @@ -0,0 +1,7 @@ +# The following patch adds a directive for certmanager to inject CA into the CRD +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME) + name: knowledgebases.arcadia.kubeagi.k8s.com.cn diff --git a/config/crd/patches/cainjection_in_vectorstores.yaml b/config/crd/patches/cainjection_in_vectorstores.yaml new file mode 100644 index 000000000..740089dda --- /dev/null +++ b/config/crd/patches/cainjection_in_vectorstores.yaml @@ -0,0 +1,7 @@ +# The following patch adds a directive for certmanager to inject CA into the CRD +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME) + name: vectorstores.arcadia.kubeagi.k8s.com.cn diff --git a/config/crd/patches/webhook_in_knowledgebases.yaml b/config/crd/patches/webhook_in_knowledgebases.yaml new file mode 100644 index 000000000..c90a815e7 --- /dev/null +++ b/config/crd/patches/webhook_in_knowledgebases.yaml @@ -0,0 +1,16 @@ +# The following patch enables a conversion webhook for the CRD +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: knowledgebases.arcadia.kubeagi.k8s.com.cn +spec: + conversion: + strategy: Webhook + webhook: + clientConfig: + service: + namespace: system + name: webhook-service + path: /convert + conversionReviewVersions: + - v1 diff --git a/config/crd/patches/webhook_in_vectorstores.yaml b/config/crd/patches/webhook_in_vectorstores.yaml new file mode 100644 index 000000000..7eba93e41 --- /dev/null +++ b/config/crd/patches/webhook_in_vectorstores.yaml @@ -0,0 +1,16 @@ +# The following patch enables a conversion webhook for the CRD +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: vectorstores.arcadia.kubeagi.k8s.com.cn +spec: + conversion: + strategy: Webhook + webhook: + clientConfig: + service: + namespace: system + name: webhook-service + path: /convert + conversionReviewVersions: + - v1 diff --git a/config/rbac/knowledgebase_editor_role.yaml b/config/rbac/knowledgebase_editor_role.yaml new file mode 100644 index 000000000..4a047bc50 --- /dev/null +++ b/config/rbac/knowledgebase_editor_role.yaml @@ -0,0 +1,24 @@ +# permissions for end users to edit knowledgebases. +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: knowledgebase-editor-role +rules: +- apiGroups: + - arcadia.kubeagi.k8s.com.cn + resources: + - knowledgebases + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - arcadia.kubeagi.k8s.com.cn + resources: + - knowledgebases/status + verbs: + - get diff --git a/config/rbac/knowledgebase_viewer_role.yaml b/config/rbac/knowledgebase_viewer_role.yaml new file mode 100644 index 000000000..4b9bacbd2 --- /dev/null +++ b/config/rbac/knowledgebase_viewer_role.yaml @@ -0,0 +1,20 @@ +# permissions for end users to view knowledgebases. +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: knowledgebase-viewer-role +rules: +- apiGroups: + - arcadia.kubeagi.k8s.com.cn + resources: + - knowledgebases + verbs: + - get + - list + - watch +- apiGroups: + - arcadia.kubeagi.k8s.com.cn + resources: + - knowledgebases/status + verbs: + - get diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 7d6d40f26..9bc665881 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -99,6 +99,32 @@ rules: - get - patch - update +- apiGroups: + - arcadia.kubeagi.k8s.com.cn + resources: + - knowledgebases + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - arcadia.kubeagi.k8s.com.cn + resources: + - knowledgebases/finalizers + verbs: + - update +- apiGroups: + - arcadia.kubeagi.k8s.com.cn + resources: + - knowledgebases/status + verbs: + - get + - patch + - update - apiGroups: - arcadia.kubeagi.k8s.com.cn resources: @@ -203,6 +229,32 @@ rules: - get - patch - update +- apiGroups: + - arcadia.kubeagi.k8s.com.cn + resources: + - vectorstores + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - arcadia.kubeagi.k8s.com.cn + resources: + - vectorstores/finalizers + verbs: + - update +- apiGroups: + - arcadia.kubeagi.k8s.com.cn + resources: + - vectorstores/status + verbs: + - get + - patch + - update - apiGroups: - arcadia.kubeagi.k8s.com.cn resources: diff --git a/config/rbac/vectorstore_editor_role.yaml b/config/rbac/vectorstore_editor_role.yaml new file mode 100644 index 000000000..a8df7ea9f --- /dev/null +++ b/config/rbac/vectorstore_editor_role.yaml @@ -0,0 +1,24 @@ +# permissions for end users to edit vectorstores. +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: vectorstore-editor-role +rules: +- apiGroups: + - arcadia.kubeagi.k8s.com.cn + resources: + - vectorstores + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - arcadia.kubeagi.k8s.com.cn + resources: + - vectorstores/status + verbs: + - get diff --git a/config/rbac/vectorstore_viewer_role.yaml b/config/rbac/vectorstore_viewer_role.yaml new file mode 100644 index 000000000..f353de077 --- /dev/null +++ b/config/rbac/vectorstore_viewer_role.yaml @@ -0,0 +1,20 @@ +# permissions for end users to view vectorstores. +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: vectorstore-viewer-role +rules: +- apiGroups: + - arcadia.kubeagi.k8s.com.cn + resources: + - vectorstores + verbs: + - get + - list + - watch +- apiGroups: + - arcadia.kubeagi.k8s.com.cn + resources: + - vectorstores/status + verbs: + - get diff --git a/config/samples/arcadia_v1alpha1_embedders.yaml b/config/samples/arcadia_v1alpha1_embedders.yaml index 84a585d8e..2e70e181c 100644 --- a/config/samples/arcadia_v1alpha1_embedders.yaml +++ b/config/samples/arcadia_v1alpha1_embedders.yaml @@ -2,20 +2,16 @@ apiVersion: v1 kind: Secret metadata: name: zhipuai + namespace: arcadia type: Opaque data: apiKey: "NGZjY2VjZWIxNjY2Y2QxMTgwOGMyMThkNmQ2MTk5NTAuVENYVXZhUUNXRnlJa3hCMw==" # replace this with your API key --- apiVersion: arcadia.kubeagi.k8s.com.cn/v1alpha1 -kind: Embedders +kind: Embedder metadata: - labels: - app.kubernetes.io/name: embedders - app.kubernetes.io/instance: embeddings-sample - app.kubernetes.io/part-of: arcadia - app.kubernetes.io/managed-by: kustomize - app.kubernetes.io/created-by: arcadia name: zhipuai-embedders-sample + namespace: arcadia spec: serviceType: "zhipuai" endpoint: diff --git a/config/samples/arcadia_v1alpha1_knowledgebase.yaml b/config/samples/arcadia_v1alpha1_knowledgebase.yaml new file mode 100644 index 000000000..41cc73bc1 --- /dev/null +++ b/config/samples/arcadia_v1alpha1_knowledgebase.yaml @@ -0,0 +1,26 @@ +apiVersion: arcadia.kubeagi.k8s.com.cn/v1alpha1 +kind: KnowledgeBase +metadata: + name: knowledgebase-sample + namespace: arcadia +spec: + displayName: "测试 KnowledgeBase" + description: "测试 KnowledgeBase" + embedder: + apiGroup: arcadia.kubeagi.k8s.com.cn/v1alpha1 + kind: Embedders + name: zhipuai-embedders-sample + namespace: arcadia + vectorStore: + apiGroup: arcadia.kubeagi.k8s.com.cn/v1alpha1 + kind: VectorStores + name: chroma-sample + namespace: arcadia + fileGroups: + - datasource: + apiGroup: arcadia.kubeagi.k8s.com.cn/v1alpha1 + kind: Datasources + name: arcadia-local + namespace: arcadia + paths: + - example-test/knowledgebase-1.txt diff --git a/config/samples/arcadia_v1alpha1_vectorstore.yaml b/config/samples/arcadia_v1alpha1_vectorstore.yaml new file mode 100644 index 000000000..6f6ddc395 --- /dev/null +++ b/config/samples/arcadia_v1alpha1_vectorstore.yaml @@ -0,0 +1,12 @@ +apiVersion: arcadia.kubeagi.k8s.com.cn/v1alpha1 +kind: VectorStore +metadata: + name: chroma-sample + namespace: arcadia +spec: + displayName: "测试 Chroma VectorStore,无密码" + description: "测试 VectorStore" + endpoint: + url: http://chroma-chromadb.arcadia.svc:8000 + chroma: + distanceFunction: cosine diff --git a/config/samples/kustomization.yaml b/config/samples/kustomization.yaml index b096ce099..4e72faac0 100644 --- a/config/samples/kustomization.yaml +++ b/config/samples/kustomization.yaml @@ -7,6 +7,8 @@ resources: - arcadia_v1alpha1_embedders.yaml - arcadia_v1alpha1_dataset.yaml - arcadia_v1alpha1_versioneddataset.yaml +- arcadia_v1alpha1_knowledgebase.yaml +- arcadia_v1alpha1_vectorstore.yaml - arcadia_v1alpha1_worker.yaml - arcadia_v1alpha1_model.yaml #+kubebuilder:scaffold:manifestskustomizesamples diff --git a/controllers/datasource_controller.go b/controllers/datasource_controller.go index 2de8cfa6d..e1bbcdc0e 100644 --- a/controllers/datasource_controller.go +++ b/controllers/datasource_controller.go @@ -23,12 +23,12 @@ import ( "github.com/go-logr/logr" corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/builder" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/event" "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/predicate" @@ -37,7 +37,6 @@ import ( arcadiav1alpha1 "github.com/kubeagi/arcadia/api/v1alpha1" "github.com/kubeagi/arcadia/pkg/config" "github.com/kubeagi/arcadia/pkg/datasource" - "github.com/kubeagi/arcadia/pkg/utils" ) // DatasourceReconciler reconciles a Datasource object @@ -67,22 +66,38 @@ func (r *DatasourceReconciler) Reconcile(ctx context.Context, req ctrl.Request) instance := &arcadiav1alpha1.Datasource{} if err := r.Get(ctx, req.NamespacedName, instance); err != nil { - if errors.IsNotFound(err) { - // datasourcce has been deleted. - return reconcile.Result{}, nil + // There's no need to requeue if the resource no longer exists. + // Otherwise, we'll be requeued implicitly because we return an error. + logger.V(1).Info("Failed to get Datasource") + return ctrl.Result{}, client.IgnoreNotFound(err) + } + + // Add a finalizer.Then, we can define some operations which should + // occur before the Datasource to be deleted. + // More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/finalizers + if newAdded := controllerutil.AddFinalizer(instance, arcadiav1alpha1.Finalizer); newAdded { + logger.Info("Try to add Finalizer for Datasource") + if err := r.Update(ctx, instance); err != nil { + logger.Error(err, "Failed to update Datasource to add finalizer, will try again later") + return ctrl.Result{}, err } - return reconcile.Result{}, err + logger.Info("Adding Finalizer for Datasource done") + return ctrl.Result{Requeue: true}, nil } - if instance.DeletionTimestamp != nil { - logger.Info("Delete datasource") - // remove the finalizer to complete the delete action - instance.Finalizers = utils.RemoveString(instance.Finalizers, arcadiav1alpha1.Finalizer) - err := r.Client.Update(ctx, instance) - if err != nil { - return reconcile.Result{}, fmt.Errorf("failed to update datasource finializer: %w", err) + // Check if the Datasource instance is marked to be deleted, which is + // indicated by the deletion timestamp being set. + if instance.GetDeletionTimestamp() != nil && controllerutil.ContainsFinalizer(instance, arcadiav1alpha1.Finalizer) { + logger.Info("Performing Finalizer Operations for Datasource before delete CR") + // TODO perform the finalizer operations here, for example: remove data? + logger.Info("Removing Finalizer for Datasource after successfully performing the operations") + controllerutil.RemoveFinalizer(instance, arcadiav1alpha1.Finalizer) + if err := r.Update(ctx, instance); err != nil { + logger.Error(err, "Failed to remove finalizer for Datasource") + return ctrl.Result{}, err } - return reconcile.Result{}, nil + logger.Info("Remove Datasource done") + return ctrl.Result{}, nil } // initialize labels @@ -97,9 +112,9 @@ func (r *DatasourceReconciler) Reconcile(ctx context.Context, req ctrl.Request) // check datasource if err := r.Checkdatasource(ctx, logger, instance); err != nil { // Update conditioned status - return reconcile.Result{}, err + return reconcile.Result{RequeueAfter: waitMedium}, err } - return ctrl.Result{}, nil + return ctrl.Result{RequeueAfter: waitLonger}, nil } // SetupWithManager sets up the controller with the Manager. @@ -116,18 +131,8 @@ func (r *DatasourceReconciler) SetupWithManager(mgr ctrl.Manager) error { Complete(r) } -func (r *DatasourceReconciler) Initialize(ctx context.Context, logger logr.Logger, instance *arcadiav1alpha1.Datasource) (bool, error) { +func (r *DatasourceReconciler) Initialize(ctx context.Context, logger logr.Logger, instance *arcadiav1alpha1.Datasource) (update bool, err error) { instanceDeepCopy := instance.DeepCopy() - l := len(instanceDeepCopy.Finalizers) - - var update bool - - instanceDeepCopy.Finalizers = utils.AddString(instanceDeepCopy.Finalizers, arcadiav1alpha1.Finalizer) - if l != len(instanceDeepCopy.Finalizers) { - logger.V(1).Info("Add Finalizer for datasource", "Finalizer", arcadiav1alpha1.Finalizer) - update = true - } - if instanceDeepCopy.Labels == nil { instanceDeepCopy.Labels = make(map[string]string) } diff --git a/controllers/embedder_controller.go b/controllers/embedder_controller.go index 8edf20a6a..121d7bee3 100644 --- a/controllers/embedder_controller.go +++ b/controllers/embedder_controller.go @@ -22,11 +22,11 @@ import ( "github.com/go-logr/logr" corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/log" arcadiav1alpha1 "github.com/kubeagi/arcadia/api/v1alpha1" @@ -63,20 +63,46 @@ func (r *EmbedderReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c logger.Info("Reconciling embedding resource") instance := &arcadiav1alpha1.Embedder{} - err := r.Get(ctx, req.NamespacedName, instance) - if err != nil { - if errors.IsNotFound(err) { - return ctrl.Result{}, nil + if err := r.Get(ctx, req.NamespacedName, instance); err != nil { + // There's no need to requeue if the resource no longer exists. + // Otherwise, we'll be requeued implicitly because we return an error. + logger.V(1).Info("Failed to get Embedder") + return ctrl.Result{}, client.IgnoreNotFound(err) + } + + // Add a finalizer.Then, we can define some operations which should + // occur before the Embedder to be deleted. + // More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/finalizers + if newAdded := controllerutil.AddFinalizer(instance, arcadiav1alpha1.Finalizer); newAdded { + logger.Info("Try to add Finalizer for Embedder") + if err := r.Update(ctx, instance); err != nil { + logger.Error(err, "Failed to update Embedder to add finalizer, will try again later") + return ctrl.Result{}, err } - return ctrl.Result{}, err + logger.Info("Adding Finalizer for Embedder done") + return ctrl.Result{}, nil } - err = r.CheckEmbedder(ctx, logger, instance) - if err != nil { - return ctrl.Result{}, err + // Check if the Embedder instance is marked to be deleted, which is + // indicated by the deletion timestamp being set. + if instance.GetDeletionTimestamp() != nil && controllerutil.ContainsFinalizer(instance, arcadiav1alpha1.Finalizer) { + logger.Info("Performing Finalizer Operations for Embedder before delete CR") + // TODO perform the finalizer operations here, for example: remove data? + logger.Info("Removing Finalizer for Embedder after successfully performing the operations") + controllerutil.RemoveFinalizer(instance, arcadiav1alpha1.Finalizer) + if err := r.Update(ctx, instance); err != nil { + logger.Error(err, "Failed to remove finalizer for Embedder") + return ctrl.Result{}, err + } + logger.Info("Remove Embedder done") + return ctrl.Result{}, nil + } + + if err := r.CheckEmbedder(ctx, logger, instance); err != nil { + return ctrl.Result{RequeueAfter: waitMedium}, err } - return ctrl.Result{}, nil + return ctrl.Result{RequeueAfter: waitLonger}, nil } // SetupWithManager sets up the controller with the Manager. diff --git a/controllers/knowledgebase_controller.go b/controllers/knowledgebase_controller.go new file mode 100644 index 000000000..b19d0c90c --- /dev/null +++ b/controllers/knowledgebase_controller.go @@ -0,0 +1,450 @@ +/* +Copyright 2023 KubeAGI. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controllers + +import ( + "bytes" + "context" + "fmt" + "io" + "path/filepath" + "time" + + "github.com/go-logr/logr" + "github.com/minio/minio-go/v7" + "github.com/tmc/langchaingo/documentloaders" + langchainembeddings "github.com/tmc/langchaingo/embeddings" + "github.com/tmc/langchaingo/textsplitter" + "github.com/tmc/langchaingo/vectorstores/chroma" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + utilerrors "k8s.io/apimachinery/pkg/util/errors" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + + arcadiav1alpha1 "github.com/kubeagi/arcadia/api/v1alpha1" + "github.com/kubeagi/arcadia/pkg/config" + "github.com/kubeagi/arcadia/pkg/datasource" + "github.com/kubeagi/arcadia/pkg/embeddings" + zhipuaiembeddings "github.com/kubeagi/arcadia/pkg/embeddings/zhipuai" + "github.com/kubeagi/arcadia/pkg/llms/zhipuai" +) + +const ( + waitLonger = time.Minute + waitSmaller = time.Second * 3 + waitMedium = time.Second * 30 +) + +var ( + errNoDataSource = fmt.Errorf("no datasource") + errDataSourceTypeUnkonwn = fmt.Errorf("unknown datasource type") + errDataSourceNotReady = fmt.Errorf("datasource is not ready") + errEmbedderNotReady = fmt.Errorf("embedder is not ready") + errVectorStoreNotReady = fmt.Errorf("vector store is not ready") +) + +// KnowledgeBaseReconciler reconciles a KnowledgeBase object +type KnowledgeBaseReconciler struct { + client.Client + Scheme *runtime.Scheme +} + +//+kubebuilder:rbac:groups=arcadia.kubeagi.k8s.com.cn,resources=knowledgebases,verbs=get;list;watch;create;update;patch;delete +//+kubebuilder:rbac:groups=arcadia.kubeagi.k8s.com.cn,resources=knowledgebases/status,verbs=get;update;patch +//+kubebuilder:rbac:groups=arcadia.kubeagi.k8s.com.cn,resources=knowledgebases/finalizers,verbs=update +//+kubebuilder:rbac:groups=arcadia.kubeagi.k8s.com.cn,resources=embedders,verbs=get;list;watch +//+kubebuilder:rbac:groups=arcadia.kubeagi.k8s.com.cn,resources=embedders/status,verbs=get +//+kubebuilder:rbac:groups=arcadia.kubeagi.k8s.com.cn,resources=datasources,verbs=get;list;watch +//+kubebuilder:rbac:groups=arcadia.kubeagi.k8s.com.cn,resources=datasources/status,verbs=get +//+kubebuilder:rbac:groups=arcadia.kubeagi.k8s.com.cn,resources=vectorstores,verbs=get;list;watch +//+kubebuilder:rbac:groups=arcadia.kubeagi.k8s.com.cn,resources=vectorstores/status,verbs=get + +// Reconcile is part of the main kubernetes reconciliation loop which aims to +// move the current state of the cluster closer to the desired state. +// +// For more details, check Reconcile and its Result here: +// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.12.2/pkg/reconcile +func (r *KnowledgeBaseReconciler) Reconcile(ctx context.Context, req ctrl.Request) (result ctrl.Result, err error) { + log := ctrl.LoggerFrom(ctx) + log.V(1).Info("Start KnowledgeBase Reconcile") + kb := &arcadiav1alpha1.KnowledgeBase{} + if err := r.Get(ctx, req.NamespacedName, kb); err != nil { + // There's no need to requeue if the resource no longer exists. + // Otherwise, we'll be requeued implicitly because we return an error. + log.V(1).Info("Failed to get KnowledgeBase") + return ctrl.Result{}, client.IgnoreNotFound(err) + } + log = log.WithValues("Generation", kb.GetGeneration(), "ObservedGeneration", kb.Status.ObservedGeneration, "creator", kb.Spec.Creator) + log.V(1).Info("Get KnowledgeBase instance") + + // Add a finalizer.Then, we can define some operations which should + // occur before the KnowledgeBase to be deleted. + // More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/finalizers + if newAdded := controllerutil.AddFinalizer(kb, arcadiav1alpha1.Finalizer); newAdded { + log.Info("Try to add Finalizer for KnowledgeBase") + if err = r.Update(ctx, kb); err != nil { + log.Error(err, "Failed to update KnowledgeBase to add finalizer, will try again later") + return ctrl.Result{}, err + } + log.Info("Adding Finalizer for KnowledgeBase done") + return ctrl.Result{}, nil + } + + // Check if the KnowledgeBase instance is marked to be deleted, which is + // indicated by the deletion timestamp being set. + if kb.GetDeletionTimestamp() != nil && controllerutil.ContainsFinalizer(kb, arcadiav1alpha1.Finalizer) { + log.Info("Performing Finalizer Operations for KnowledgeBase before delete CR") + // TODO perform the finalizer operations here, for example: remove vectorstore data? + log.Info("Removing Finalizer for KnowledgeBase after successfully performing the operations") + controllerutil.RemoveFinalizer(kb, arcadiav1alpha1.Finalizer) + if err = r.Update(ctx, kb); err != nil { + log.Error(err, "Failed to remove finalizer for KnowledgeBase") + return ctrl.Result{}, err + } + log.Info("Remove KnowledgeBase done") + return ctrl.Result{}, nil + } + + kb, result, err = r.reconcile(ctx, log, kb) + + // Update status after reconciliation. + if updateStatusErr := r.patchStatus(ctx, kb); updateStatusErr != nil { + log.Error(updateStatusErr, "unable to update status after reconciliation") + return ctrl.Result{Requeue: true}, updateStatusErr + } + + return result, err +} + +func (r *KnowledgeBaseReconciler) patchStatus(ctx context.Context, kb *arcadiav1alpha1.KnowledgeBase) error { + latest := &arcadiav1alpha1.KnowledgeBase{} + if err := r.Client.Get(ctx, client.ObjectKeyFromObject(kb), latest); err != nil { + return err + } + patch := client.MergeFrom(latest.DeepCopy()) + latest.Status = kb.Status + return r.Client.Status().Patch(ctx, latest, patch, client.FieldOwner("knowledgebase-controller")) +} + +// SetupWithManager sets up the controller with the Manager. +func (r *KnowledgeBaseReconciler) SetupWithManager(mgr ctrl.Manager) error { + return ctrl.NewControllerManagedBy(mgr). + For(&arcadiav1alpha1.KnowledgeBase{}). + Complete(r) +} + +func (r *KnowledgeBaseReconciler) reconcile(ctx context.Context, log logr.Logger, kb *arcadiav1alpha1.KnowledgeBase) (*arcadiav1alpha1.KnowledgeBase, ctrl.Result, error) { + // Observe generation change + if kb.Status.ObservedGeneration != kb.Generation { + kb.Status.ObservedGeneration = kb.Generation + r.setCondition(kb, kb.InitCondition()) + if updateStatusErr := r.patchStatus(ctx, kb); updateStatusErr != nil { + log.Error(updateStatusErr, "unable to update status after generation update") + return kb, ctrl.Result{Requeue: true}, updateStatusErr + } + } + + if kb.Status.IsReady() { + return kb, ctrl.Result{}, nil + } + + embedderReq := kb.Spec.Embedder + vectorStoreReq := kb.Spec.VectorStore + fileGroupsReq := kb.Spec.FileGroups + if embedderReq == nil || vectorStoreReq == nil || len(fileGroupsReq) == 0 { + r.setCondition(kb, kb.PendingCondition("emberder or vectorstore or filegroups is not setting")) + return kb, ctrl.Result{}, nil + } + + embedder := &arcadiav1alpha1.Embedder{} + if err := r.Get(ctx, types.NamespacedName{Name: kb.Spec.Embedder.Name, Namespace: kb.Spec.Embedder.GetNamespace()}, embedder); err != nil { + if errors.IsNotFound(err) { + r.setCondition(kb, kb.PendingCondition("embedder is not found")) + return kb, ctrl.Result{RequeueAfter: waitLonger}, nil + } + r.setCondition(kb, kb.ErrorCondition(err.Error())) + return kb, ctrl.Result{}, err + } + + vectorStore := &arcadiav1alpha1.VectorStore{} + if err := r.Get(ctx, types.NamespacedName{Name: kb.Spec.VectorStore.Name, Namespace: kb.Spec.VectorStore.GetNamespace()}, vectorStore); err != nil { + if errors.IsNotFound(err) { + r.setCondition(kb, kb.PendingCondition("vectorStore is not found")) + return kb, ctrl.Result{RequeueAfter: waitLonger}, nil + } + r.setCondition(kb, kb.ErrorCondition(err.Error())) + return kb, ctrl.Result{}, err + } + + errs := make([]error, 0) + for _, fileGroup := range kb.Spec.FileGroups { + if err := r.reconcileFileGroup(ctx, log, kb, vectorStore, embedder, fileGroup); err != nil { + log.Error(err, "Failed to reconcile FileGroup", "fileGroup", fileGroup) + errs = append(errs, err) + } + } + if err := utilerrors.NewAggregate(errs); err != nil { + r.setCondition(kb, kb.ErrorCondition(err.Error())) + return kb, ctrl.Result{RequeueAfter: waitLonger}, nil + } else { + for _, fileGroupDetail := range kb.Status.FileGroupDetail { + for _, fileDetail := range fileGroupDetail.FileDetails { + if fileDetail.ErrMessage != "" { + r.setCondition(kb, kb.ErrorCondition(fileDetail.ErrMessage)) + return kb, ctrl.Result{RequeueAfter: waitLonger}, nil + } + } + } + r.setCondition(kb, kb.ReadyCondition()) + } + + return kb, ctrl.Result{}, nil +} + +func (r *KnowledgeBaseReconciler) setCondition(kb *arcadiav1alpha1.KnowledgeBase, condition ...arcadiav1alpha1.Condition) *arcadiav1alpha1.KnowledgeBase { + kb.Status.SetConditions(condition...) + return kb +} + +func (r *KnowledgeBaseReconciler) reconcileFileGroup(ctx context.Context, log logr.Logger, kb *arcadiav1alpha1.KnowledgeBase, vectorStore *arcadiav1alpha1.VectorStore, embedder *arcadiav1alpha1.Embedder, group arcadiav1alpha1.FileGroup) (err error) { + defer func() { + if err != nil { + err = fmt.Errorf("failed to reconcile FileGroup.DataSource: %s: %w", group.Datasource.Name, err) + } + }() + + if group.Datasource == nil { + return errNoDataSource + } + dataSource := &arcadiav1alpha1.Datasource{} + ns := group.Datasource.GetNamespace() + if err = r.Get(ctx, types.NamespacedName{Name: group.Datasource.Name, Namespace: ns}, dataSource); err != nil { + if errors.IsNotFound(err) { + return errNoDataSource + } else { + return err + } + } + if !dataSource.Status.IsReady() { + return errDataSourceNotReady + } + if dataSource.Spec.Type() == arcadiav1alpha1.DatasourceTypeUnknown { + return errDataSourceTypeUnkonwn + } + + if len(kb.Status.FileGroupDetail) == 0 { + kb.Status.FileGroupDetail = make([]arcadiav1alpha1.FileGroupDetail, 1) + kb.Status.FileGroupDetail[0].Init(group) + } + var fileGroupDetail *arcadiav1alpha1.FileGroupDetail + pathMap := make(map[string]*arcadiav1alpha1.FileDetails, 1) + for i, detail := range kb.Status.FileGroupDetail { + if detail.Datasource != nil && detail.Datasource.Name == dataSource.Name && detail.Datasource.GetNamespace() == dataSource.GetNamespace() { + fileGroupDetail = &kb.Status.FileGroupDetail[i] + for i, detail := range fileGroupDetail.FileDetails { + pathMap[detail.Path] = &fileGroupDetail.FileDetails[i] // FIXME 这样对不? + } + break + } + } + if fileGroupDetail == nil { + fileGroupDetail = &arcadiav1alpha1.FileGroupDetail{} + fileGroupDetail.Init(group) + kb.Status.FileGroupDetail = append(kb.Status.FileGroupDetail, *fileGroupDetail) + } + + var ds datasource.Datasource + info := &arcadiav1alpha1.OSS{} + switch dataSource.Spec.Type() { + case arcadiav1alpha1.DatasourceTypeLocal: + system, err := config.GetSystemDatasource(ctx, r.Client) + if err != nil { + return err + } + endpoint := system.Spec.Enpoint.DeepCopy() + if endpoint != nil && endpoint.AuthSecret != nil { + endpoint.AuthSecret.WithNameSpace(system.Namespace) + } + ds, err = datasource.NewLocal(ctx, r.Client, endpoint) + if err != nil { + return err + } + info = &arcadiav1alpha1.OSS{Bucket: dataSource.Namespace} + case arcadiav1alpha1.DatasourceTypeOSS: + endpoint := dataSource.Spec.Enpoint.DeepCopy() + // set auth secret's namespace to the datasource's namespace + if endpoint.AuthSecret != nil { + endpoint.AuthSecret.WithNameSpace(dataSource.Namespace) + } + ds, err = datasource.NewOSS(ctx, r.Client, endpoint) + if err != nil { + return err + } + info = dataSource.Spec.OSS.DeepCopy() + } + errs := make([]error, 0) + for _, path := range group.Paths { + fileDatail, ok := pathMap[path] + if !ok { + fileDatail = &arcadiav1alpha1.FileDetails{ + Path: path, + Checksum: "", + LastUpdateTime: metav1.Now(), + Phase: arcadiav1alpha1.FileProcessPhasePending, + ErrMessage: "", + } + fileGroupDetail.FileDetails = append(fileGroupDetail.FileDetails, *fileDatail) + } + info.Object = path + stat, err := ds.StatFile(ctx, info) + log.V(0).Info(fmt.Sprintf("raw StatFile:%#v", stat), "path", path) + if err != nil { + errs = append(errs, err) + fileDatail.UpdateErr(err) + continue + } + switch dataSource.Spec.Type() { + case arcadiav1alpha1.DatasourceTypeLocal, arcadiav1alpha1.DatasourceTypeOSS: + objectStat, ok := stat.(minio.ObjectInfo) + log.V(0).Info(fmt.Sprintf("minio StatFile:%#v", objectStat), "path", path) + if !ok { + err = fmt.Errorf("failed to convert stat to minio.ObjectInfo:%s", path) + errs = append(errs, err) + fileDatail.UpdateErr(err) + continue + } + if objectStat.ETag == fileDatail.Checksum { + fileDatail.LastUpdateTime = metav1.Now() + continue + } + fileDatail.Checksum = objectStat.ETag + tags, err := ds.GetTags(ctx, info) + if err != nil { + errs = append(errs, err) + fileDatail.UpdateErr(err) + continue + } + file, err := ds.ReadFile(ctx, info) + if err != nil { + errs = append(errs, err) + fileDatail.UpdateErr(err) + continue + } + defer file.Close() + if err = r.handleFile(ctx, log, file, info.Object, tags, kb, vectorStore, embedder); err != nil { + err = fmt.Errorf("failed to handle file:%s: %w", path, err) + errs = append(errs, err) + fileDatail.UpdateErr(err) + continue + } + fileDatail.UpdateErr(nil) + } + } + return utilerrors.NewAggregate(errs) +} + +func (r *KnowledgeBaseReconciler) handleFile(ctx context.Context, log logr.Logger, file io.ReadCloser, fileName string, tags map[string]string, kb *arcadiav1alpha1.KnowledgeBase, store *arcadiav1alpha1.VectorStore, embedder *arcadiav1alpha1.Embedder) (err error) { + if !embedder.Status.IsReady() { + return errEmbedderNotReady + } + if !store.Status.IsReady() { + return errVectorStoreNotReady + } + var em langchainembeddings.Embedder + switch embedder.Spec.ServiceType { // nolint: gocritic + case embeddings.ZhiPuAI: + apiKey, err := embedder.AuthAPIKey(ctx, r.Client) + if err != nil { + return err + } + em, err = zhipuaiembeddings.NewZhiPuAI( + zhipuaiembeddings.WithClient(*zhipuai.NewZhiPuAI(apiKey)), + ) + if err != nil { + return err + } + } + data, err := io.ReadAll(file) // TODO Load large files in pieces to save memory + // TODO Line or single line byte exceeds emberder limit + if err != nil { + return err + } + log.Info("file tags", "tags", tags) // TODO use tags to handle file + dataReader := bytes.NewReader(data) + var loader documentloaders.Loader + switch filepath.Ext(fileName) { + case "txt": + loader = documentloaders.NewText(dataReader) + case "csv": + loader = documentloaders.NewCSV(dataReader) + case "html", "htm": + loader = documentloaders.NewHTML(dataReader) + default: + loader = documentloaders.NewText(dataReader) + } + + // initliaze text splitter + // var split textsplitter.TextSplitter + split := textsplitter.NewTokenSplitter( + textsplitter.WithChunkSize(300), + textsplitter.WithChunkOverlap(30), + ) + // TODO tags -> qa or fulltext + // switch { + // case "token": + // split = textsplitter.NewTokenSplitter( + // textsplitter.WithChunkSize(chunkSize), + // textsplitter.WithChunkOverlap(chunkOverlap), + // ) + // case "markdown": + // split = textsplitter.NewMarkdownTextSplitter( + // textsplitter.WithChunkSize(chunkSize), + // textsplitter.WithChunkOverlap(chunkOverlap), + // ) + //default: + // split = textsplitter.NewRecursiveCharacter( + // textsplitter.WithChunkSize(chunkSize), + // textsplitter.WithChunkOverlap(chunkOverlap), + // ) + //} + + documents, err := loader.LoadAndSplit(ctx, split) + if err != nil { + return err + } + + switch store.Spec.Type() { // nolint: gocritic + case arcadiav1alpha1.VectorStoreTypeChroma: + s, err := chroma.New( + chroma.WithChromaURL(store.Spec.Enpoint.URL), + chroma.WithDistanceFunction(store.Spec.Chroma.DistanceFunction), + chroma.WithNameSpace(kb.VectorStoreCollectionName()), + chroma.WithEmbedder(em), + ) + if err != nil { + return err + } + if err = s.AddDocuments(ctx, documents); err != nil { + return err + } + } + return nil +} diff --git a/controllers/llm_controller.go b/controllers/llm_controller.go index 28c3f6c97..b8e914930 100644 --- a/controllers/llm_controller.go +++ b/controllers/llm_controller.go @@ -22,16 +22,15 @@ import ( "github.com/go-logr/logr" corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/builder" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/event" "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/predicate" - "sigs.k8s.io/controller-runtime/pkg/reconcile" arcadiav1alpha1 "github.com/kubeagi/arcadia/api/v1alpha1" "github.com/kubeagi/arcadia/pkg/llms" @@ -65,23 +64,49 @@ func (r *LLMReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.R // Fetch the LLM instance instance := &arcadiav1alpha1.LLM{} - err := r.Get(ctx, req.NamespacedName, instance) - if err != nil { - if errors.IsNotFound(err) { - // LLM instance has been deleted. - return reconcile.Result{}, nil - } + if err := r.Get(ctx, req.NamespacedName, instance); err != nil { + // There's no need to requeue if the resource no longer exists. + // Otherwise, we'll be requeued implicitly because we return an error. + logger.V(1).Info("Failed to get LLM") return ctrl.Result{}, client.IgnoreNotFound(err) } - err = r.CheckLLM(ctx, logger, instance) + // Add a finalizer.Then, we can define some operations which should + // occur before the LLM to be deleted. + // More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/finalizers + if newAdded := controllerutil.AddFinalizer(instance, arcadiav1alpha1.Finalizer); newAdded { + logger.Info("Try to add Finalizer for LLM") + if err := r.Update(ctx, instance); err != nil { + logger.Error(err, "Failed to update LLM to add finalizer, will try again later") + return ctrl.Result{}, err + } + logger.Info("Adding Finalizer for LLM done") + return ctrl.Result{Requeue: true}, nil + } + + // Check if the LLM instance is marked to be deleted, which is + // indicated by the deletion timestamp being set. + if instance.GetDeletionTimestamp() != nil && controllerutil.ContainsFinalizer(instance, arcadiav1alpha1.Finalizer) { + logger.Info("Performing Finalizer Operations for LLM before delete CR") + // TODO perform the finalizer operations here, for example: remove data? + logger.Info("Removing Finalizer for LLM after successfully performing the operations") + controllerutil.RemoveFinalizer(instance, arcadiav1alpha1.Finalizer) + if err := r.Update(ctx, instance); err != nil { + logger.Error(err, "Failed to remove finalizer for LLM") + return ctrl.Result{}, err + } + logger.Info("Remove LLM done") + return ctrl.Result{}, nil + } + + err := r.CheckLLM(ctx, logger, instance) if err != nil { logger.Error(err, "Failed to check LLM") // Update conditioned status - return ctrl.Result{}, err + return ctrl.Result{RequeueAfter: waitMedium}, err } - return ctrl.Result{}, nil + return ctrl.Result{RequeueAfter: waitLonger}, nil } // SetupWithManager sets up the controller with the Manager. diff --git a/controllers/model_controller.go b/controllers/model_controller.go index 185814a21..7636425bb 100644 --- a/controllers/model_controller.go +++ b/controllers/model_controller.go @@ -23,12 +23,12 @@ import ( "github.com/go-logr/logr" corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/builder" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/event" "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/predicate" @@ -37,7 +37,6 @@ import ( arcadiav1alpha1 "github.com/kubeagi/arcadia/api/v1alpha1" "github.com/kubeagi/arcadia/pkg/config" "github.com/kubeagi/arcadia/pkg/datasource" - "github.com/kubeagi/arcadia/pkg/utils" ) // ModelReconciler reconciles a Model object @@ -62,33 +61,46 @@ type ModelReconciler struct { // For more details, check Reconcile and its Result here: // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.12.2/pkg/reconcile func (r *ModelReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - _ = log.FromContext(ctx) - logger := log.FromContext(ctx) logger.Info("Starting model reconcile") instance := &arcadiav1alpha1.Model{} if err := r.Get(ctx, req.NamespacedName, instance); err != nil { - if errors.IsNotFound(err) { - // model has been deleted. - return reconcile.Result{}, nil + // There's no need to requeue if the resource no longer exists. + // Otherwise, we'll be requeued implicitly because we return an error. + logger.V(1).Info("Failed to get Model") + return ctrl.Result{}, client.IgnoreNotFound(err) + } + + // Add a finalizer.Then, we can define some operations which should + // occur before the Model to be deleted. + // More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/finalizers + if newAdded := controllerutil.AddFinalizer(instance, arcadiav1alpha1.Finalizer); newAdded { + logger.Info("Try to add Finalizer for Model") + if err := r.Update(ctx, instance); err != nil { + logger.Error(err, "Failed to update Model to add finalizer, will try again later") + return ctrl.Result{}, err } - return reconcile.Result{}, err + logger.Info("Adding Finalizer for Model done") + return ctrl.Result{Requeue: true}, nil } - if instance.DeletionTimestamp != nil { - logger.Info("Delete model") + // Check if the Model instance is marked to be deleted, which is + // indicated by the deletion timestamp being set. + if instance.GetDeletionTimestamp() != nil && controllerutil.ContainsFinalizer(instance, arcadiav1alpha1.Finalizer) { + logger.Info("Performing Finalizer Operations for Model before delete CR") // remove all model files from storage service if err := r.RemoveModel(ctx, logger, instance); err != nil { return reconcile.Result{}, fmt.Errorf("failed to remove model: %w", err) } - // remove the finalizer to complete the delete action - instance.Finalizers = utils.RemoveString(instance.Finalizers, arcadiav1alpha1.Finalizer) - err := r.Client.Update(ctx, instance) - if err != nil { - return reconcile.Result{}, fmt.Errorf("failed to update model finializer: %w", err) + logger.Info("Removing Finalizer for Model after successfully performing the operations") + controllerutil.RemoveFinalizer(instance, arcadiav1alpha1.Finalizer) + if err := r.Update(ctx, instance); err != nil { + logger.Error(err, "Failed to remove finalizer for Model") + return ctrl.Result{}, err } - return reconcile.Result{}, nil + logger.Info("Remove Model done") + return ctrl.Result{}, nil } // initialize labels @@ -102,10 +114,10 @@ func (r *ModelReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl if err := r.CheckModel(ctx, logger, instance); err != nil { // Update conditioned status - return reconcile.Result{}, err + return reconcile.Result{RequeueAfter: waitMedium}, err } - return ctrl.Result{}, nil + return ctrl.Result{RequeueAfter: waitLonger}, nil } // SetupWithManager sets up the controller with the Manager. @@ -121,17 +133,8 @@ func (r *ModelReconciler) SetupWithManager(mgr ctrl.Manager) error { Complete(r) } -func (r *ModelReconciler) Initialize(ctx context.Context, logger logr.Logger, instance *arcadiav1alpha1.Model) (bool, error) { +func (r *ModelReconciler) Initialize(ctx context.Context, logger logr.Logger, instance *arcadiav1alpha1.Model) (update bool, err error) { instanceDeepCopy := instance.DeepCopy() - l := len(instanceDeepCopy.Finalizers) - - var update bool - - instanceDeepCopy.Finalizers = utils.AddString(instanceDeepCopy.Finalizers, arcadiav1alpha1.Finalizer) - if l != len(instanceDeepCopy.Finalizers) { - logger.V(1).Info("Add Finalizer for model", "Finalizer", arcadiav1alpha1.Finalizer) - update = true - } // Initialize Labels if instanceDeepCopy.Labels == nil { diff --git a/controllers/prompt_controller.go b/controllers/prompt_controller.go index ed3dc6f94..355f4fd5e 100644 --- a/controllers/prompt_controller.go +++ b/controllers/prompt_controller.go @@ -22,13 +22,13 @@ import ( "github.com/go-logr/logr" corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/builder" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/event" "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/predicate" @@ -67,11 +67,38 @@ func (r *PromptReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr // Prompt engineering prompt := &arcadiav1alpha1.Prompt{} if err := r.Get(ctx, req.NamespacedName, prompt); err != nil { - if errors.IsNotFound(err) { - // Prompt has been deleted. - return reconcile.Result{}, nil + // There's no need to requeue if the resource no longer exists. + // Otherwise, we'll be requeued implicitly because we return an error. + logger.V(1).Info("Failed to get Prompt") + return ctrl.Result{}, client.IgnoreNotFound(err) + } + + // Add a finalizer.Then, we can define some operations which should + // occur before the Prompt to be deleted. + // More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/finalizers + if newAdded := controllerutil.AddFinalizer(prompt, arcadiav1alpha1.Finalizer); newAdded { + logger.Info("Try to add Finalizer for Prompt") + if err := r.Update(ctx, prompt); err != nil { + logger.Error(err, "Failed to update Prompt to add finalizer, will try again later") + return ctrl.Result{}, err } - return reconcile.Result{}, err + logger.Info("Adding Finalizer for Prompt done") + return ctrl.Result{Requeue: true}, nil + } + + // Check if the Prompt instance is marked to be deleted, which is + // indicated by the deletion timestamp being set. + if prompt.GetDeletionTimestamp() != nil && controllerutil.ContainsFinalizer(prompt, arcadiav1alpha1.Finalizer) { + logger.Info("Performing Finalizer Operations for Prompt before delete CR") + // TODO perform the finalizer operations here, for example: remove data? + logger.Info("Removing Finalizer for Prompt after successfully performing the operations") + controllerutil.RemoveFinalizer(prompt, arcadiav1alpha1.Finalizer) + if err := r.Update(ctx, prompt); err != nil { + logger.Error(err, "Failed to remove finalizer for Prompt") + return ctrl.Result{}, err + } + logger.Info("Remove Prompt done") + return ctrl.Result{}, nil } err := r.CallLLM(ctx, logger, prompt) diff --git a/controllers/vectorstore_controller.go b/controllers/vectorstore_controller.go new file mode 100644 index 000000000..3ab695455 --- /dev/null +++ b/controllers/vectorstore_controller.go @@ -0,0 +1,149 @@ +/* +Copyright 2023 KubeAGI. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controllers + +import ( + "context" + + "github.com/go-logr/logr" + "github.com/tmc/langchaingo/vectorstores/chroma" + "k8s.io/apimachinery/pkg/runtime" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + + arcadiav1alpha1 "github.com/kubeagi/arcadia/api/v1alpha1" +) + +// VectorStoreReconciler reconciles a VectorStore object +type VectorStoreReconciler struct { + client.Client + Scheme *runtime.Scheme +} + +//+kubebuilder:rbac:groups=arcadia.kubeagi.k8s.com.cn,resources=vectorstores,verbs=get;list;watch;create;update;patch;delete +//+kubebuilder:rbac:groups=arcadia.kubeagi.k8s.com.cn,resources=vectorstores/status,verbs=get;update;patch +//+kubebuilder:rbac:groups=arcadia.kubeagi.k8s.com.cn,resources=vectorstores/finalizers,verbs=update + +// Reconcile is part of the main kubernetes reconciliation loop which aims to +// move the current state of the cluster closer to the desired state. +// TODO(user): Modify the Reconcile function to compare the state specified by +// the VectorStore object against the actual cluster state, and then +// perform operations to make the cluster state reflect the state specified by +// the user. +// +// For more details, check Reconcile and its Result here: +// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.12.2/pkg/reconcile +func (r *VectorStoreReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + log := ctrl.LoggerFrom(ctx) + log.V(1).Info("Start VectorStore Reconcile") + vs := &arcadiav1alpha1.VectorStore{} + if err := r.Get(ctx, req.NamespacedName, vs); err != nil { + // There's no need to requeue if the resource no longer exists. + // Otherwise, we'll be requeued implicitly because we return an error. + log.V(1).Info("Failed to get VectorStore") + return ctrl.Result{}, client.IgnoreNotFound(err) + } + log.V(1).Info("Get VectorStore instance") + + // Add a finalizer.Then, we can define some operations which should + // occur before the KnowledgeBase to be deleted. + // More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/finalizers + if newAdded := controllerutil.AddFinalizer(vs, arcadiav1alpha1.Finalizer); newAdded { + log.Info("Try to add Finalizer for VectorStore") + if err := r.Update(ctx, vs); err != nil { + log.Error(err, "Failed to update VectorStore to add finalizer, will try again later") + return ctrl.Result{}, err + } + log.Info("Adding Finalizer for VectorStore done") + return ctrl.Result{}, nil + } + + // Check if the VectorStore instance is marked to be deleted, which is + // indicated by the deletion timestamp being set. + if vs.GetDeletionTimestamp() != nil && controllerutil.ContainsFinalizer(vs, arcadiav1alpha1.Finalizer) { + log.Info("Performing Finalizer Operations for VectorStore before delete CR") + // TODO perform the finalizer operations here, for example: remove vectorstore data? + log.Info("Removing Finalizer for VectorStore after successfully performing the operations") + controllerutil.RemoveFinalizer(vs, arcadiav1alpha1.Finalizer) + if err := r.Update(ctx, vs); err != nil { + log.Error(err, "Failed to remove finalizer for VectorStore") + return ctrl.Result{}, err + } + log.Info("Remove VectorStore done") + return ctrl.Result{}, nil + } + + if vs.Labels == nil { + vs.Labels = make(map[string]string) + } + + currentType := string(vs.Spec.Type()) + if v := vs.Labels[arcadiav1alpha1.LabelVectorStoreType]; v != currentType { + vs.Labels[arcadiav1alpha1.LabelVectorStoreType] = currentType + return reconcile.Result{}, r.Update(ctx, vs) + } + + if err := r.CheckVectorStore(ctx, log, vs); err != nil { + return reconcile.Result{RequeueAfter: waitMedium}, err + } + + return ctrl.Result{RequeueAfter: waitLonger}, nil +} + +// SetupWithManager sets up the controller with the Manager. +func (r *VectorStoreReconciler) SetupWithManager(mgr ctrl.Manager) error { + return ctrl.NewControllerManagedBy(mgr). + For(&arcadiav1alpha1.VectorStore{}). + Complete(r) +} + +func (r *VectorStoreReconciler) CheckVectorStore(ctx context.Context, log logr.Logger, vs *arcadiav1alpha1.VectorStore) (err error) { + log.Info("check vectorstore") + switch vs.Spec.Type() { + case arcadiav1alpha1.VectorStoreTypeChroma: + _, err := chroma.New( + chroma.WithOpenAiAPIKey("fake_key_just_for_chroma_heartbeat"), + chroma.WithChromaURL(vs.Spec.Enpoint.URL), + chroma.WithDistanceFunction(vs.Spec.Chroma.DistanceFunction), + ) + if err != nil { + r.setCondition(vs, vs.ErrorCondition(err.Error())) + } else { + r.setCondition(vs, vs.ReadyCondition()) + } + default: + r.setCondition(vs, vs.ErrorCondition("unsupported vectorstore type")) + } + + return r.patchStatus(ctx, vs) +} +func (r *VectorStoreReconciler) setCondition(vs *arcadiav1alpha1.VectorStore, condition ...arcadiav1alpha1.Condition) *arcadiav1alpha1.VectorStore { + vs.Status.SetConditions(condition...) + return vs +} + +func (r *VectorStoreReconciler) patchStatus(ctx context.Context, vs *arcadiav1alpha1.VectorStore) error { + latest := &arcadiav1alpha1.VectorStore{} + if err := r.Client.Get(ctx, client.ObjectKeyFromObject(vs), latest); err != nil { + return err + } + patch := client.MergeFrom(latest.DeepCopy()) + latest.Status = vs.Status + return r.Client.Status().Patch(ctx, latest, patch, client.FieldOwner("vectorstore-controller")) +} diff --git a/doc/development.md b/doc/development.md index f8846ddca..ddcdcd68d 100644 --- a/doc/development.md +++ b/doc/development.md @@ -19,3 +19,55 @@ operator-sdk create api --resource --controller --namespaced=true --group arcadi ```bash make generate && make manifests ``` + +### Base controller reconcile + +`// Note:` For ease of understanding only, it does not need to be written to the source file + +```go +func (r *KnowledgeBaseReconciler) Reconcile(ctx context.Context, req ctrl.Request) (result ctrl.Result, err error) { + logger := ctrl.LoggerFrom(ctx) + logger.V(1).Info("Start KnowledgeBase Reconcile") // Note: V(1) means debug log level + kb := &arcadiav1alpha1.KnowledgeBase{} + if err := r.Get(ctx, req.NamespacedName, kb); err != nil { + // There's no need to requeue if the resource no longer exists. + // Otherwise, we'll be requeued implicitly because we return an error. + logger.V(1).Info("Failed to get KnowledgeBase") + return ctrl.Result{}, client.IgnoreNotFound(err) + } + logger = logger.WithValues("Generation", kb.GetGeneration(), "ObservedGeneration", kb.Status.ObservedGeneration, "creator", kb.Spec.Creator) // Note: add log value is optional + logger.V(1).Info("Get KnowledgeBase instance") + + // Add a finalizer.Then, we can define some operations which should + // occur before the KnowledgeBase to be deleted. + // More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/finalizers + if newAdded := controllerutil.AddFinalizer(kb, arcadiav1alpha1.Finalizer); newAdded { + logger.Info("Try to add Finalizer for KnowledgeBase") + if err = r.Update(ctx, kb); err != nil { + logger.Error(err, "Failed to update KnowledgeBase to add finalizer, will try again later") + return ctrl.Result{}, err + } + logger.Info("Adding Finalizer for KnowledgeBase done") + return ctrl.Result{Requeue: true}, nil + } + + // Check if the KnowledgeBase instance is marked to be deleted, which is + // indicated by the deletion timestamp being set. + if kb.GetDeletionTimestamp() != nil && controllerutil.ContainsFinalizer(kb, arcadiav1alpha1.Finalizer) { + logger.Info("Performing Finalizer Operations for KnowledgeBase before delete CR") + // TODO perform the finalizer operations here, for example: remove vectorstore data? + logger.Info("Removing Finalizer for KnowledgeBase after successfully performing the operations") + controllerutil.RemoveFinalizer(kb, arcadiav1alpha1.Finalizer) + if err = r.Update(ctx, kb); err != nil { + logger.Error(err, "Failed to remove finalizer for KnowledgeBase") + return ctrl.Result{}, err + } + logger.Info("Remove KnowledgeBase done") + return ctrl.Result{}, nil + } + + // Note: do you logic here + + return result, err +} +``` diff --git a/graphql-server/go-server/pkg/datasource/datasource.go b/graphql-server/go-server/pkg/datasource/datasource.go index 7c9799ebc..38f808bc5 100644 --- a/graphql-server/go-server/pkg/datasource/datasource.go +++ b/graphql-server/go-server/pkg/datasource/datasource.go @@ -79,7 +79,9 @@ func CreateDatasource(ctx context.Context, c dynamic.Interface, name, namespace, APIVersion: v1alpha1.GroupVersion.String(), }, Spec: v1alpha1.DatasourceSpec{ - DiplayName: displayname, + CommonSpec: v1alpha1.CommonSpec{ + DisplayName: displayname, + }, Enpoint: &v1alpha1.Endpoint{ URL: url, AuthSecret: &v1alpha1.TypedObjectReference{ @@ -104,7 +106,9 @@ func CreateDatasource(ctx context.Context, c dynamic.Interface, name, namespace, APIVersion: v1alpha1.GroupVersion.String(), }, Spec: v1alpha1.DatasourceSpec{ - DiplayName: displayname, + CommonSpec: v1alpha1.CommonSpec{ + DisplayName: displayname, + }, }, } } diff --git a/main.go b/main.go index e8de0c0fd..8a83353f7 100644 --- a/main.go +++ b/main.go @@ -33,6 +33,7 @@ import ( arcadiav1alpha1 "github.com/kubeagi/arcadia/api/v1alpha1" "github.com/kubeagi/arcadia/controllers" + "github.com/kubeagi/arcadia/pkg/utils" // Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.) // to ensure that exec-entrypoint and run can make use of them. @@ -187,6 +188,20 @@ func main() { setupLog.Error(err, "unable to create controller", "controller", "Model") os.Exit(1) } + if err = (&controllers.KnowledgeBaseReconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + }).SetupWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "KnowledgeBase") + os.Exit(1) + } + if err = (&controllers.VectorStoreReconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + }).SetupWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "VectorStore") + os.Exit(1) + } //+kubebuilder:scaffold:builder if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil { @@ -205,6 +220,10 @@ func main() { _ = mgr.AddMetricsExtraHandler("/debug/pprof/symbol", http.HandlerFunc(pprof.Symbol)) _ = mgr.AddMetricsExtraHandler("/debug/pprof/trace", http.HandlerFunc(pprof.Trace)) } + if err = utils.SetSelfNamespace(); err != nil { + setupLog.Error(err, "unable to get self namespace") + os.Exit(1) + } setupLog.Info("starting manager") if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil { diff --git a/pkg/config/config.go b/pkg/config/config.go index 9da09a7fd..a2ecbc6d9 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -19,7 +19,6 @@ package config import ( "context" "fmt" - "os" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/util/yaml" @@ -27,20 +26,17 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" arcadiav1alpha1 "github.com/kubeagi/arcadia/api/v1alpha1" + "github.com/kubeagi/arcadia/pkg/utils" ) const ( EnvConfigKey = "DEFAULT_CONFIG" EnvConfigDefaultValue = "arcadia-config" - EnvNamespaceKey = "POD_NAMESPACE" - - InClusterNamespacePath = "/var/run/secrets/kubernetes.io/serviceaccount/namespace" ) var ( - ErrNoConfigEnv = fmt.Errorf("env:%s is not found", EnvConfigKey) - ErrNoConfig = fmt.Errorf("config in configmap is empty") - ErrNoNamespaceEnv = fmt.Errorf("not in cluster and env:%s is not found", EnvNamespaceKey) + ErrNoConfigEnv = fmt.Errorf("env:%s is not found", EnvConfigKey) + ErrNoConfig = fmt.Errorf("config in configmap is empty") ) func GetSystemDatasource(ctx context.Context, c client.Client) (*arcadiav1alpha1.Datasource, error) { @@ -53,10 +49,7 @@ func GetSystemDatasource(ctx context.Context, c client.Client) (*arcadiav1alpha1 if config.SystemDatasource.Namespace != nil { namespace = *config.SystemDatasource.Namespace } else { - namespace, err = GetSelfNamespace() - if err != nil { - return nil, err - } + namespace = utils.GetSelfNamespace() } source := &arcadiav1alpha1.Datasource{} if err = c.Get(ctx, client.ObjectKey{Name: name, Namespace: namespace}, source); err != nil { @@ -70,10 +63,7 @@ func GetConfig(ctx context.Context, c client.Client) (config *Config, err error) if cmName == "" { return nil, ErrNoConfigEnv } - cmNamespace, err := GetSelfNamespace() - if err != nil { - return nil, err - } + cmNamespace := utils.GetSelfNamespace() cm := &corev1.ConfigMap{} if err = c.Get(ctx, client.ObjectKey{Name: cmName, Namespace: cmNamespace}, cm); err != nil { return nil, err @@ -87,24 +77,3 @@ func GetConfig(ctx context.Context, c client.Client) (config *Config, err error) } return config, nil } - -func GetSelfNamespace() (string, error) { - // Check whether the namespace file exists. - // If not, we are not running in cluster so can't guess the namespace. - if _, err := os.Stat(InClusterNamespacePath); os.IsNotExist(err) { - operatorNamespace := os.Getenv(EnvNamespaceKey) - if operatorNamespace == "" { - return "", ErrNoNamespaceEnv - } - return operatorNamespace, nil - } else if err != nil { - return "", fmt.Errorf("error checking namespace file: %w", err) - } - - // Load the namespace file and return its content - namespace, err := os.ReadFile(InClusterNamespacePath) - if err != nil { - return "", fmt.Errorf("error reading namespace file: %w", err) - } - return string(namespace), nil -} diff --git a/pkg/datasource/datasource.go b/pkg/datasource/datasource.go index 8b4a2c4a2..91b4b50a3 100644 --- a/pkg/datasource/datasource.go +++ b/pkg/datasource/datasource.go @@ -19,6 +19,7 @@ package datasource import ( "context" "errors" + "io" "strings" "github.com/minio/minio-go/v7" @@ -35,11 +36,15 @@ var ( ErrBucketNotProvided = errors.New("no bucket provided") ErrOSSNoSuchBucket = errors.New("no such bucket") ErrOSSNoSuchObject = errors.New("no such object in bucket") + ErrOSSNoConfig = errors.New("no bucket or object config") ) type Datasource interface { Stat(ctx context.Context, info any) error Remove(ctx context.Context, info any) error + ReadFile(ctx context.Context, info any) (io.ReadCloser, error) + StatFile(ctx context.Context, info any) (any, error) + GetTags(ctx context.Context, info any) (map[string]string, error) } var _ Datasource = (*Unknown)(nil) @@ -59,6 +64,18 @@ func (u *Unknown) Remove(ctx context.Context, info any) error { return ErrUnknowDatasourceType } +func (u *Unknown) ReadFile(ctx context.Context, info any) (io.ReadCloser, error) { + return nil, ErrUnknowDatasourceType +} + +func (u *Unknown) StatFile(ctx context.Context, info any) (any, error) { + return nil, ErrUnknowDatasourceType +} + +func (u *Unknown) GetTags(ctx context.Context, info any) (map[string]string, error) { + return nil, ErrUnknowDatasourceType +} + var _ Datasource = (*Local)(nil) // Local is a special datasource which use the system datasource as oss to store user-uploaded local files @@ -94,6 +111,18 @@ func (local *Local) Remove(ctx context.Context, info any) error { return local.oss.Remove(ctx, info) } +func (local *Local) ReadFile(ctx context.Context, info any) (io.ReadCloser, error) { + return local.oss.ReadFile(ctx, info) +} + +func (local *Local) StatFile(ctx context.Context, info any) (any, error) { + return local.oss.StatFile(ctx, info) +} + +func (local *Local) GetTags(ctx context.Context, info any) (map[string]string, error) { + return local.oss.GetTags(ctx, info) +} + var _ Datasource = (*OSS)(nil) // OSS is a wrapper to object storage service @@ -101,6 +130,11 @@ type OSS struct { *minio.Client } +var ( + ossDefaultGetOpt = minio.GetObjectOptions{} + ossDefaultGetTagOpt = minio.GetObjectTaggingOptions{} +) + func NewOSS(ctx context.Context, c client.Client, endpoint *v1alpha1.Endpoint) (*OSS, error) { var accessKeyID, secretAccessKey string if endpoint.AuthSecret != nil { @@ -137,7 +171,7 @@ func (oss *OSS) Stat(ctx context.Context, info any) error { } ossInfo, ok := info.(*v1alpha1.OSS) if !ok { - return errors.New("invalid check info for OSS") + return ErrOSSNoConfig } return oss.statObject(ctx, ossInfo) @@ -186,3 +220,42 @@ func (oss *OSS) statObject(ctx context.Context, ossInfo *v1alpha1.OSS) error { return nil } + +func (oss *OSS) ReadFile(ctx context.Context, info any) (io.ReadCloser, error) { + ossInfo, err := oss.preCheck(info) + if err != nil { + return nil, err + } + return oss.Client.GetObject(ctx, ossInfo.Bucket, ossInfo.Object, ossDefaultGetOpt) +} + +func (oss *OSS) StatFile(ctx context.Context, info any) (any, error) { + ossInfo, err := oss.preCheck(info) + if err != nil { + return nil, err + } + return oss.Client.StatObject(ctx, ossInfo.Bucket, ossInfo.Object, ossDefaultGetOpt) +} + +func (oss *OSS) GetTags(ctx context.Context, info any) (map[string]string, error) { + ossInfo, err := oss.preCheck(info) + if err != nil { + return nil, err + } + tags, err := oss.Client.GetObjectTagging(ctx, ossInfo.Bucket, ossInfo.Object, ossDefaultGetTagOpt) + if err != nil { + return nil, err + } + return tags.ToMap(), nil +} + +func (oss *OSS) preCheck(info any) (*v1alpha1.OSS, error) { + if info == nil { + return nil, ErrOSSNoConfig + } + ossInfo, ok := info.(*v1alpha1.OSS) + if !ok || ossInfo.Bucket == "" || ossInfo.Object == "" { + return nil, ErrOSSNoConfig + } + return ossInfo, nil +} diff --git a/pkg/utils/namespace.go b/pkg/utils/namespace.go new file mode 100644 index 000000000..65bbc9d2b --- /dev/null +++ b/pkg/utils/namespace.go @@ -0,0 +1,61 @@ +/* +Copyright 2023 The KubeAGI Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package utils + +import ( + "fmt" + "os" +) + +const ( + EnvNamespaceKey = "POD_NAMESPACE" + + InClusterNamespacePath = "/var/run/secrets/kubernetes.io/serviceaccount/namespace" +) + +var ( + ErrNoNamespaceEnv = fmt.Errorf("not in cluster and env:%s is not found", EnvNamespaceKey) +) + +var selfNamespace string + +func GetSelfNamespace() string { + return selfNamespace +} + +func SetSelfNamespace() error { + // Check whether the namespace file exists. + // If not, we are not running in cluster so can't guess the namespace. + if _, err := os.Stat(InClusterNamespacePath); os.IsNotExist(err) { + operatorNamespace := os.Getenv(EnvNamespaceKey) + if operatorNamespace == "" { + return ErrNoNamespaceEnv + } + selfNamespace = operatorNamespace + return nil + } else if err != nil { + return fmt.Errorf("error checking namespace file: %w", err) + } + + // Load the namespace file and return its content + namespace, err := os.ReadFile(InClusterNamespacePath) + if err != nil { + return fmt.Errorf("error reading namespace file: %w", err) + } + selfNamespace = string(namespace) + return nil +} diff --git a/tests/example-test.sh b/tests/example-test.sh index a23fb0a27..deee18601 100755 --- a/tests/example-test.sh +++ b/tests/example-test.sh @@ -179,7 +179,7 @@ kind load docker-image controller:example-e2e --name=$KindName info "3. install arcadia" kubectl create namespace arcadia -helm install -narcadia arcadia charts/arcadia --set deployment.image=controller:example-e2e +helm install -narcadia arcadia charts/arcadia --set deployment.image=controller:example-e2e --wait --timeout $HelmTimeout info "4. check system datasource arcadia-minio(system datasource)" waitCRDStatusReady "Datasource" "arcadia" "arcadia-minio" @@ -193,4 +193,53 @@ if [[ $datasourceType != "local" ]]; then exit 1 fi +info "6. create and verify vectorstore" +info "6.1. helm install chroma" +helm repo add chroma https://amikos-tech.github.io/chromadb-chart/ +helm repo update chroma +helm install -narcadia chroma chroma/chromadb --set service.type=ClusterIP --set chromadb.auth.enabled=false --wait --timeout $HelmTimeout +info "6.2. verify chroma vectorstore status" +kubectl apply -f config/samples/arcadia_v1alpha1_vectorstore.yaml +waitCRDStatusReady "VectorStore" "arcadia" "chroma-sample" + +info "7. create and verify knowledgebase" +info "7.1. upload some test file to system datasource" +bucket=$(kubectl get datasource -n arcadia arcadia-minio -o json | jq -r .spec.oss.bucket) +s3_key=$(kubectl get secrets -n arcadia arcadia-minio -o json | jq -r ".data.rootUser" | base64 --decode) +s3_secret=$(kubectl get secrets -n arcadia arcadia-minio -o json | jq -r ".data.rootPassword" | base64 --decode) +resource="/${bucket}/example-test/knowledgebase-1.txt" +content_type="application/octet-stream" +date=$(date -R) +_signature="PUT\n\n${content_type}\n${date}\n${resource}" +signature=$(echo -en ${_signature} | openssl sha1 -hmac ${s3_secret} -binary | base64) +kubectl port-forward -n arcadia svc/arcadia-minio 9000:9000 >/dev/null 2>&1 & +minio_pid=$! +info "port-forward minio in pid: $minio_pid" +sleep 3 +curl -X PUT -T "tests/knowledgebase-1.txt" \ + -H "Host: 127.0.0.1:9000" \ + -H "Date: ${date}" \ + -H "Content-Type: ${content_type}" \ + -H "Authorization: AWS ${s3_key}:${signature}" \ + http://127.0.0.1:9000${resource} +info "7.2. create embedder and wait it ready" +kubectl apply -f config/samples/arcadia_v1alpha1_embedders.yaml +waitCRDStatusReady "Embedders" "arcadia" "zhipuai-embedders-sample" +info "7.3. create knowledgebase and wait it ready" +kubectl apply -f config/samples/arcadia_v1alpha1_knowledgebase.yaml +waitCRDStatusReady "KnowledgeBase" "arcadia" "knowledgebase-sample" +info "7.4. check this vectorstore has data" +kubectl port-forward -n arcadia svc/chroma-chromadb 8000:8000 >/dev/null 2>&1 & +chroma_pid=$! +info "port-forward chroma in pid: $minio_pid" +sleep 3 +collection_test_id=$(curl http://127.0.0.1:8000/api/v1/collections/arcadia_knowledgebase-sample | jq -r .id) +collection_test_count=$(curl http://127.0.0.1:8000/api/v1/collections/${collection_test_id}/count) +if [[ $collection_test_count =~ ^[0-9]+$ ]]; then + info "collection test count: $collection_test_count" +else + echo "$collection_test_count is not a number" + exit 1 +fi + info "all finished! ✅" diff --git a/tests/knowledgebase-1.txt b/tests/knowledgebase-1.txt new file mode 100644 index 000000000..5732e758c --- /dev/null +++ b/tests/knowledgebase-1.txt @@ -0,0 +1,11 @@ +## What is Arcadia? + +**Arcadia** comes from [Greek mythology](https://www.greekmythology.com/Myths/Places/Arcadia/arcadia.html)(a tranquil and idyllic region, representing harmony, serenity, and natural beauty). We aim to help everyone find a more perfect integration between humans and AI. + +To archieve this goal, we provide this one-stop LLMOps solution: + +- Dataset Management: storage/real-time data,multimodal,pre-processing,vectorization +- Models Management: local/online LLMs(development,training,deployment),inference acceleration +- Application Management: development,optimization,deployment with visual editor + +Furthermore, we can easily host **Arcadia** at any kubernetes cluster as production ready by integrating [kubebb](https://github.com/kubebb)(A kubernetes building blocks),