Skip to content

Commit

Permalink
Merge pull request #505 from bjwswang/evaluation
Browse files Browse the repository at this point in the history
feat: define CRD RAG under group evaluations
  • Loading branch information
bjwswang authored Jan 9, 2024
2 parents a0639e8 + 03a351a commit 307e26d
Show file tree
Hide file tree
Showing 35 changed files with 993 additions and 18 deletions.
11 changes: 10 additions & 1 deletion PROJECT
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ componentConfig: true
domain: kubeagi.k8s.com.cn
layout:
- go.kubebuilder.io/v3
multigroup: true
plugins:
manifests.sdk.operatorframework.io/v2: {}
scorecard.sdk.operatorframework.io/v2: {}
multigroup: true
projectName: arcadia
repo: github.com/kubeagi/arcadia
resources:
Expand Down Expand Up @@ -142,4 +142,13 @@ resources:
kind: Prompt
path: github.com/kubeagi/arcadia/api/app-node/prompt/v1alpha1
version: v1alpha1
- api:
crdVersion: v1
namespaced: true
controller: true
domain: arcadia.kubeagi.k8s.com.cn
group: evaluation
kind: RAG
path: github.com/kubeagi/arcadia/api/evaluation/v1alpha1
version: v1alpha1
version: "3"
79 changes: 79 additions & 0 deletions api/evaluation/v1alpha1/common.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/*
Copyright 2024 KubeAGI.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package v1alpha1

type MetricsKind string

const (
// AnswerRelevancy in ragas https://docs.ragas.io/en/stable/concepts/metrics/answer_relevance.html
// Scores the relevancy of the answer according to the given question.
AnswerRelevancy MetricsKind = "answer_relevancy"

// AnswerSimilarity in ragas https://docs.ragas.io/en/stable/concepts/metrics/semantic_similarity.html
// Scores the semantic similarity of ground truth with generated answer.
AnswerSimilarity MetricsKind = "answer_similarity"

// AnswerCorrectness in ragas https://docs.ragas.io/en/stable/concepts/metrics/answer_correctness.html
// Measures answer correctness compared to ground truth as a combination(Weighted) of
// - factuality
// - semantic similarity
AnswerCorrectness MetricsKind = "answer_correctness"

// Faithfulness in ragas https://docs.ragas.io/en/stable/concepts/metrics/faithfulness.html
// Scores the factual consistency of the generated answer against the given context.
Faithfulness MetricsKind = "faithfulness"

// ContextPrecision in ragas https://docs.ragas.io/en/stable/concepts/metrics/context_precision.html
// Average Precision is a metric that evaluates whether all of the relevant items selected by the model are ranked higher or not.
ContextPrecision MetricsKind = "context_precision"

// ContextRelevancy in ragas https://docs.ragas.io/en/stable/concepts/metrics/context_relevancy.html
// Gauges the relevancy of the retrieved context
ContextRelevancy MetricsKind = "context_relevancy"

// ContextRecall in ragas https://docs.ragas.io/en/stable/concepts/metrics/context_recall.html
// Estimates context recall by estimating TP and FN using annotated answer and retrieved context.
ContextRecall MetricsKind = "context_recall"

// AspectCritique in ragas https://docs.ragas.io/en/stable/concepts/metrics/critique.html
// Designed to assess submissions based on predefined aspects such as harmlessness and correctness
// SUPPORTED_ASPECTS = [ harmfulness, maliciousness, coherence, correctness, conciseness, ]
AspectCritique MetricsKind = "aspect_critique"
)

type Metric struct {
// Kind of this Metric
Kind MetricsKind `json:"kind,omitempty"`

// Parameters in this Metrics
Parameters []Parameter `json:"parameters,omitempty"`

// ToleranceThreshbold on this Metric
// If the evaluation score is smaller than this tolerance threshold,we treat this RAG solution as `Bad`
ToleranceThreshbold int `json:"tolerance_threshold,omitempty"`
}

// Parameter to metrics which is a key-value pair
type Parameter struct {
Key string `json:"key,omitempty"`
Value string `json:"value,omitempty"`
}

// Report is the summarization of evaluation
type Report struct {
// TODO
}
36 changes: 36 additions & 0 deletions api/evaluation/v1alpha1/groupversion_info.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/*
Copyright 2024 KubeAGI.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

// Package v1alpha1 contains API Schema definitions for the evaluation.arcadia v1alpha1 API group
// +kubebuilder:object:generate=true
// +groupName=evaluation.arcadia.kubeagi.k8s.com.cn
package v1alpha1

import (
"k8s.io/apimachinery/pkg/runtime/schema"
"sigs.k8s.io/controller-runtime/pkg/scheme"
)

var (
// GroupVersion is group version used to register these objects
GroupVersion = schema.GroupVersion{Group: "evaluation.arcadia.kubeagi.k8s.com.cn", Version: "v1alpha1"}

// SchemeBuilder is used to add go types to the GroupVersionKind scheme
SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion}

// AddToScheme adds the types in this group-version to the given scheme.
AddToScheme = SchemeBuilder.AddToScheme
)
83 changes: 83 additions & 0 deletions api/evaluation/v1alpha1/rag_types.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
/*
Copyright 2024 KubeAGI.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package v1alpha1

import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

basev1alpha1 "github.com/kubeagi/arcadia/api/base/v1alpha1"
)

// Dataset stands for the files used to generate ragas test dataset
type Dataset struct {
// From defines the source which provides this QA Files for test dataset
// Only `VersionedDataset` allowed
Source *basev1alpha1.TypedObjectReference `json:"source,omitempty"`
// Files retrieved from Source and used in this testdataset
// - For file with tag `object_type: QA`, will be used directly
// - TODO: For file without special tags, will use `QAGenerationChain` to generate QAs (Not Supported Yet)
Files []string `json:"files,omitempty"`
}

// RAGSpec defines the desired state of RAG
type RAGSpec struct {
// Application(required) defines the target of this RAG evaluation
Application *basev1alpha1.TypedObjectReference `json:"application"`

// Datasets defines the dataset which will be used to generate test datasets
Datasets []Dataset `json:"datasets"`

// JudgeLLM(required) defines the judge which is a LLM to evaluate RAG application against test dataset
JudgeLLM *basev1alpha1.TypedObjectReference `json:"judge_llm"`

// Metrics that this rag evaluation will do
Metrics []Metric `json:"metrics"`

// Report defines the evaluation report configurations
Report Report `json:"report,omitempty"`
}

// RAGStatus defines the observed state of RAG
type RAGStatus struct {
// INSERT ADDITIONAL STATUS FIELD - define observed state of cluster
// Important: Run "make" to regenerate code after modifying this file
}

//+kubebuilder:object:root=true
//+kubebuilder:subresource:status

// RAG is the Schema for the rags API
type RAG struct {
metav1.TypeMeta `json:",inline"`
metav1.ObjectMeta `json:"metadata,omitempty"`

Spec RAGSpec `json:"spec,omitempty"`
Status RAGStatus `json:"status,omitempty"`
}

//+kubebuilder:object:root=true

// RAGList contains a list of RAG
type RAGList struct {
metav1.TypeMeta `json:",inline"`
metav1.ListMeta `json:"metadata,omitempty"`
Items []RAG `json:"items"`
}

func init() {
SchemeBuilder.Register(&RAG{}, &RAGList{})
}
Loading

0 comments on commit 307e26d

Please sign in to comment.