Merge pull request #505 from bjwswang/evaluation

feat: define CRD RAG under group evaluations
kubeagi · Jan 9, 2024 · 307e26d · 307e26d
2 parents a0639e8 + 03a351a
commit 307e26d
Show file tree

Hide file tree

Showing 35 changed files with 993 additions and 18 deletions.
diff --git a/PROJECT b/PROJECT
@@ -2,10 +2,10 @@ componentConfig: true
 domain: kubeagi.k8s.com.cn
 layout:
 - go.kubebuilder.io/v3
+multigroup: true
 plugins:
   manifests.sdk.operatorframework.io/v2: {}
   scorecard.sdk.operatorframework.io/v2: {}
-multigroup: true
 projectName: arcadia
 repo: github.com/kubeagi/arcadia
 resources:
@@ -142,4 +142,13 @@ resources:
   kind: Prompt
   path: github.com/kubeagi/arcadia/api/app-node/prompt/v1alpha1
   version: v1alpha1
+- api:
+    crdVersion: v1
+    namespaced: true
+  controller: true
+  domain: arcadia.kubeagi.k8s.com.cn
+  group: evaluation
+  kind: RAG
+  path: github.com/kubeagi/arcadia/api/evaluation/v1alpha1
+  version: v1alpha1
 version: "3"
diff --git a/api/evaluation/v1alpha1/common.go b/api/evaluation/v1alpha1/common.go
@@ -0,0 +1,79 @@
+/*
+Copyright 2024 KubeAGI.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package v1alpha1
+
+type MetricsKind string
+
+const (
+	// AnswerRelevancy in ragas https://docs.ragas.io/en/stable/concepts/metrics/answer_relevance.html
+	// Scores the relevancy of the answer according to the given question.
+	AnswerRelevancy MetricsKind = "answer_relevancy"
+
+	// AnswerSimilarity in ragas https://docs.ragas.io/en/stable/concepts/metrics/semantic_similarity.html
+	// Scores the semantic similarity of ground truth with generated answer.
+	AnswerSimilarity MetricsKind = "answer_similarity"
+
+	// AnswerCorrectness in ragas https://docs.ragas.io/en/stable/concepts/metrics/answer_correctness.html
+	// Measures answer correctness compared to ground truth as a combination(Weighted) of
+	// - factuality
+	// - semantic similarity
+	AnswerCorrectness MetricsKind = "answer_correctness"
+
+	// Faithfulness in ragas https://docs.ragas.io/en/stable/concepts/metrics/faithfulness.html
+	// Scores the factual consistency of the generated answer against the given context.
+	Faithfulness MetricsKind = "faithfulness"
+
+	// ContextPrecision in ragas https://docs.ragas.io/en/stable/concepts/metrics/context_precision.html
+	// Average Precision is a metric that evaluates whether all of the relevant items selected by the model are ranked higher or not.
+	ContextPrecision MetricsKind = "context_precision"
+
+	// ContextRelevancy in ragas https://docs.ragas.io/en/stable/concepts/metrics/context_relevancy.html
+	// Gauges the relevancy of the retrieved context
+	ContextRelevancy MetricsKind = "context_relevancy"
+
+	// ContextRecall in ragas https://docs.ragas.io/en/stable/concepts/metrics/context_recall.html
+	// Estimates context recall by estimating TP and FN using annotated answer and retrieved context.
+	ContextRecall MetricsKind = "context_recall"
+
+	// AspectCritique in ragas https://docs.ragas.io/en/stable/concepts/metrics/critique.html
+	// Designed to assess submissions based on predefined aspects such as harmlessness and correctness
+	// SUPPORTED_ASPECTS = [ harmfulness, maliciousness, coherence, correctness, conciseness, ]
+	AspectCritique MetricsKind = "aspect_critique"
+)
+
+type Metric struct {
+	// Kind of this Metric
+	Kind MetricsKind `json:"kind,omitempty"`
+
+	// Parameters in this Metrics
+	Parameters []Parameter `json:"parameters,omitempty"`
+
+	// ToleranceThreshbold on this Metric
+	// If the evaluation score is smaller than this tolerance threshold,we treat this RAG solution as `Bad`
+	ToleranceThreshbold int `json:"tolerance_threshold,omitempty"`
+}
+
+// Parameter to metrics which is a key-value pair
+type Parameter struct {
+	Key   string `json:"key,omitempty"`
+	Value string `json:"value,omitempty"`
+}
+
+// Report is the summarization of evaluation
+type Report struct {
+	// TODO
+}
diff --git a/api/evaluation/v1alpha1/groupversion_info.go b/api/evaluation/v1alpha1/groupversion_info.go
@@ -0,0 +1,36 @@
+/*
+Copyright 2024 KubeAGI.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+// Package v1alpha1 contains API Schema definitions for the evaluation.arcadia v1alpha1 API group
+// +kubebuilder:object:generate=true
+// +groupName=evaluation.arcadia.kubeagi.k8s.com.cn
+package v1alpha1
+
+import (
+	"k8s.io/apimachinery/pkg/runtime/schema"
+	"sigs.k8s.io/controller-runtime/pkg/scheme"
+)
+
+var (
+	// GroupVersion is group version used to register these objects
+	GroupVersion = schema.GroupVersion{Group: "evaluation.arcadia.kubeagi.k8s.com.cn", Version: "v1alpha1"}
+
+	// SchemeBuilder is used to add go types to the GroupVersionKind scheme
+	SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion}
+
+	// AddToScheme adds the types in this group-version to the given scheme.
+	AddToScheme = SchemeBuilder.AddToScheme
+)
diff --git a/api/evaluation/v1alpha1/rag_types.go b/api/evaluation/v1alpha1/rag_types.go
@@ -0,0 +1,83 @@
+/*
+Copyright 2024 KubeAGI.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package v1alpha1
+
+import (
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+
+	basev1alpha1 "github.com/kubeagi/arcadia/api/base/v1alpha1"
+)
+
+// Dataset stands for the files used to generate ragas test dataset
+type Dataset struct {
+	// From defines the source which provides this QA Files for test dataset
+	// Only `VersionedDataset` allowed
+	Source *basev1alpha1.TypedObjectReference `json:"source,omitempty"`
+	// Files retrieved from Source and used in this testdataset
+	// - For file with tag `object_type: QA`, will be used directly
+	// - TODO: For file without special tags, will use `QAGenerationChain` to generate QAs (Not Supported Yet)
+	Files []string `json:"files,omitempty"`
+}
+
+// RAGSpec defines the desired state of RAG
+type RAGSpec struct {
+	// Application(required) defines the target of this RAG evaluation
+	Application *basev1alpha1.TypedObjectReference `json:"application"`
+
+	// Datasets defines the dataset which will be used to generate test datasets
+	Datasets []Dataset `json:"datasets"`
+
+	// JudgeLLM(required) defines the judge which is a LLM to evaluate RAG application against test dataset
+	JudgeLLM *basev1alpha1.TypedObjectReference `json:"judge_llm"`
+
+	// Metrics that this rag evaluation will do
+	Metrics []Metric `json:"metrics"`
+
+	// Report defines the evaluation report configurations
+	Report Report `json:"report,omitempty"`
+}
+
+// RAGStatus defines the observed state of RAG
+type RAGStatus struct {
+	// INSERT ADDITIONAL STATUS FIELD - define observed state of cluster
+	// Important: Run "make" to regenerate code after modifying this file
+}
+
+//+kubebuilder:object:root=true
+//+kubebuilder:subresource:status
+
+// RAG is the Schema for the rags API
+type RAG struct {
+	metav1.TypeMeta   `json:",inline"`
+	metav1.ObjectMeta `json:"metadata,omitempty"`
+
+	Spec   RAGSpec   `json:"spec,omitempty"`
+	Status RAGStatus `json:"status,omitempty"`
+}
+
+//+kubebuilder:object:root=true
+
+// RAGList contains a list of RAG
+type RAGList struct {
+	metav1.TypeMeta `json:",inline"`
+	metav1.ListMeta `json:"metadata,omitempty"`
+	Items           []RAG `json:"items"`
+}
+
+func init() {
+	SchemeBuilder.Register(&RAG{}, &RAGList{})
+}