-
Notifications
You must be signed in to change notification settings - Fork 23
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #505 from bjwswang/evaluation
feat: define CRD RAG under group evaluations
- Loading branch information
Showing
35 changed files
with
993 additions
and
18 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
/* | ||
Copyright 2024 KubeAGI. | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
|
||
package v1alpha1 | ||
|
||
type MetricsKind string | ||
|
||
const ( | ||
// AnswerRelevancy in ragas https://docs.ragas.io/en/stable/concepts/metrics/answer_relevance.html | ||
// Scores the relevancy of the answer according to the given question. | ||
AnswerRelevancy MetricsKind = "answer_relevancy" | ||
|
||
// AnswerSimilarity in ragas https://docs.ragas.io/en/stable/concepts/metrics/semantic_similarity.html | ||
// Scores the semantic similarity of ground truth with generated answer. | ||
AnswerSimilarity MetricsKind = "answer_similarity" | ||
|
||
// AnswerCorrectness in ragas https://docs.ragas.io/en/stable/concepts/metrics/answer_correctness.html | ||
// Measures answer correctness compared to ground truth as a combination(Weighted) of | ||
// - factuality | ||
// - semantic similarity | ||
AnswerCorrectness MetricsKind = "answer_correctness" | ||
|
||
// Faithfulness in ragas https://docs.ragas.io/en/stable/concepts/metrics/faithfulness.html | ||
// Scores the factual consistency of the generated answer against the given context. | ||
Faithfulness MetricsKind = "faithfulness" | ||
|
||
// ContextPrecision in ragas https://docs.ragas.io/en/stable/concepts/metrics/context_precision.html | ||
// Average Precision is a metric that evaluates whether all of the relevant items selected by the model are ranked higher or not. | ||
ContextPrecision MetricsKind = "context_precision" | ||
|
||
// ContextRelevancy in ragas https://docs.ragas.io/en/stable/concepts/metrics/context_relevancy.html | ||
// Gauges the relevancy of the retrieved context | ||
ContextRelevancy MetricsKind = "context_relevancy" | ||
|
||
// ContextRecall in ragas https://docs.ragas.io/en/stable/concepts/metrics/context_recall.html | ||
// Estimates context recall by estimating TP and FN using annotated answer and retrieved context. | ||
ContextRecall MetricsKind = "context_recall" | ||
|
||
// AspectCritique in ragas https://docs.ragas.io/en/stable/concepts/metrics/critique.html | ||
// Designed to assess submissions based on predefined aspects such as harmlessness and correctness | ||
// SUPPORTED_ASPECTS = [ harmfulness, maliciousness, coherence, correctness, conciseness, ] | ||
AspectCritique MetricsKind = "aspect_critique" | ||
) | ||
|
||
type Metric struct { | ||
// Kind of this Metric | ||
Kind MetricsKind `json:"kind,omitempty"` | ||
|
||
// Parameters in this Metrics | ||
Parameters []Parameter `json:"parameters,omitempty"` | ||
|
||
// ToleranceThreshbold on this Metric | ||
// If the evaluation score is smaller than this tolerance threshold,we treat this RAG solution as `Bad` | ||
ToleranceThreshbold int `json:"tolerance_threshold,omitempty"` | ||
} | ||
|
||
// Parameter to metrics which is a key-value pair | ||
type Parameter struct { | ||
Key string `json:"key,omitempty"` | ||
Value string `json:"value,omitempty"` | ||
} | ||
|
||
// Report is the summarization of evaluation | ||
type Report struct { | ||
// TODO | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
/* | ||
Copyright 2024 KubeAGI. | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
|
||
// Package v1alpha1 contains API Schema definitions for the evaluation.arcadia v1alpha1 API group | ||
// +kubebuilder:object:generate=true | ||
// +groupName=evaluation.arcadia.kubeagi.k8s.com.cn | ||
package v1alpha1 | ||
|
||
import ( | ||
"k8s.io/apimachinery/pkg/runtime/schema" | ||
"sigs.k8s.io/controller-runtime/pkg/scheme" | ||
) | ||
|
||
var ( | ||
// GroupVersion is group version used to register these objects | ||
GroupVersion = schema.GroupVersion{Group: "evaluation.arcadia.kubeagi.k8s.com.cn", Version: "v1alpha1"} | ||
|
||
// SchemeBuilder is used to add go types to the GroupVersionKind scheme | ||
SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion} | ||
|
||
// AddToScheme adds the types in this group-version to the given scheme. | ||
AddToScheme = SchemeBuilder.AddToScheme | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
/* | ||
Copyright 2024 KubeAGI. | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
|
||
package v1alpha1 | ||
|
||
import ( | ||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
|
||
basev1alpha1 "github.com/kubeagi/arcadia/api/base/v1alpha1" | ||
) | ||
|
||
// Dataset stands for the files used to generate ragas test dataset | ||
type Dataset struct { | ||
// From defines the source which provides this QA Files for test dataset | ||
// Only `VersionedDataset` allowed | ||
Source *basev1alpha1.TypedObjectReference `json:"source,omitempty"` | ||
// Files retrieved from Source and used in this testdataset | ||
// - For file with tag `object_type: QA`, will be used directly | ||
// - TODO: For file without special tags, will use `QAGenerationChain` to generate QAs (Not Supported Yet) | ||
Files []string `json:"files,omitempty"` | ||
} | ||
|
||
// RAGSpec defines the desired state of RAG | ||
type RAGSpec struct { | ||
// Application(required) defines the target of this RAG evaluation | ||
Application *basev1alpha1.TypedObjectReference `json:"application"` | ||
|
||
// Datasets defines the dataset which will be used to generate test datasets | ||
Datasets []Dataset `json:"datasets"` | ||
|
||
// JudgeLLM(required) defines the judge which is a LLM to evaluate RAG application against test dataset | ||
JudgeLLM *basev1alpha1.TypedObjectReference `json:"judge_llm"` | ||
|
||
// Metrics that this rag evaluation will do | ||
Metrics []Metric `json:"metrics"` | ||
|
||
// Report defines the evaluation report configurations | ||
Report Report `json:"report,omitempty"` | ||
} | ||
|
||
// RAGStatus defines the observed state of RAG | ||
type RAGStatus struct { | ||
// INSERT ADDITIONAL STATUS FIELD - define observed state of cluster | ||
// Important: Run "make" to regenerate code after modifying this file | ||
} | ||
|
||
//+kubebuilder:object:root=true | ||
//+kubebuilder:subresource:status | ||
|
||
// RAG is the Schema for the rags API | ||
type RAG struct { | ||
metav1.TypeMeta `json:",inline"` | ||
metav1.ObjectMeta `json:"metadata,omitempty"` | ||
|
||
Spec RAGSpec `json:"spec,omitempty"` | ||
Status RAGStatus `json:"status,omitempty"` | ||
} | ||
|
||
//+kubebuilder:object:root=true | ||
|
||
// RAGList contains a list of RAG | ||
type RAGList struct { | ||
metav1.TypeMeta `json:",inline"` | ||
metav1.ListMeta `json:"metadata,omitempty"` | ||
Items []RAG `json:"items"` | ||
} | ||
|
||
func init() { | ||
SchemeBuilder.Register(&RAG{}, &RAGList{}) | ||
} |
Oops, something went wrong.