diff --git a/.github/workflows/eval_image_build.yml b/.github/workflows/eval_image_build.yml index d1eaaa328..8a11c8ccc 100644 --- a/.github/workflows/eval_image_build.yml +++ b/.github/workflows/eval_image_build.yml @@ -6,7 +6,6 @@ on: paths: - 'deploy/evaluation/Dockerfile' - 'pkg/**' - - 'pypi/ragas_once/**' workflow_dispatch: env: PYTHON_INDEX_URL: https://pypi.org/simple diff --git a/.github/workflows/eval_image_build_test.yml b/.github/workflows/eval_image_build_test.yml index a8d5c93b3..7ce2b56c1 100644 --- a/.github/workflows/eval_image_build_test.yml +++ b/.github/workflows/eval_image_build_test.yml @@ -6,7 +6,6 @@ on: paths: - 'deploy/evaluation/Dockerfile' - 'pkg/**' - - 'pypi/ragas_once/**' workflow_dispatch: env: PYTHON_INDEX_URL: https://pypi.org/simple diff --git a/deploy/evaluation/Dockerfile b/deploy/evaluation/Dockerfile index 09ea0c659..62e8ece0e 100644 --- a/deploy/evaluation/Dockerfile +++ b/deploy/evaluation/Dockerfile @@ -32,10 +32,17 @@ ARG PYTHON_INDEX_URL=https://pypi.mirrors.ustc.edu.cn/simple/ WORKDIR /usr/local/bin COPY --from=builder /go/src/arctl ./arctl -RUN python -m pip install ragas langchain==0.0.354 -i ${PYTHON_INDEX_URL} +RUN python -m pip install ragas langchain -i ${PYTHON_INDEX_URL} -# build ragas-once by source code -COPY ./pypi/ragas_once ./ragas_once -WORKDIR /usr/local/bin/ragas_once -RUN python setup.py bdist_wheel -d /usr/local/bin/ragas_once/dist -RUN pip install dist/ragas_once-0.0.1-py3-none-any.whl \ No newline at end of file +# Install core-library +RUN apt-get install -y git make gcc +RUN git clone https://github.com/kubeagi/core-library.git +WORKDIR /usr/local/bin/core-library +RUN make install-eval + +# DEPRECATED: moved to core library +# # build ragas-once by source code +# COPY ./pypi/ragas_once ./ragas_once +# WORKDIR /usr/local/bin/ragas_once +# RUN python setup.py bdist_wheel -d /usr/local/bin/ragas_once/dist +# RUN pip install dist/ragas_once-0.0.1-py3-none-any.whl \ No newline at end of file diff --git a/pkg/evaluation/jobs.go b/pkg/evaluation/jobs.go index c072eed77..b51832ed7 100644 --- a/pkg/evaluation/jobs.go +++ b/pkg/evaluation/jobs.go @@ -40,6 +40,7 @@ const ( defaultPVCMountPath = "/data/evaluations" defaultTestRagFile = "ragas.csv" defaultMCImage = "kubeagi/minio-mc:RELEASE.2023-01-28T20-29-38Z" + defaultEvalImage = "kubeagi/arcadia-eval:v0.2.0" // The clusterrolebinding required for the rag evaluation process is ragas-eval-clusterrolebinding by default, // and can be changed via environment variable RAG_EVAL_CLUSTERROLEBINDING. @@ -86,7 +87,7 @@ func DownloadJob(instance *evav1alpha1.RAG) (*batchv1.Job, error) { Containers: []v1.Container{ { Name: "download-dataset-files", - Image: "kubeagi/arcadia-eval:v0.1.0", + Image: defaultEvalImage, Command: []string{ "arctl", }, @@ -146,7 +147,7 @@ func GenTestDataJob(instance *evav1alpha1.RAG) (*batchv1.Job, error) { Containers: []v1.Container{ { Name: "gen-test-files", - Image: "kubeagi/arcadia-eval:v0.1.0", + Image: defaultEvalImage, Command: []string{ "arctl", }, @@ -253,20 +254,21 @@ func JudgeJobGenerator(ctx context.Context, c client.Client) func(*evav1alpha1.R Containers: []v1.Container{ { Name: "judge-llm", - Image: "kubeagi/arcadia-eval:v0.1.0", + Image: defaultEvalImage, WorkingDir: defaultPVCMountPath, Command: []string{ - "python3", + "kubeagi-cli", }, Args: []string{ - "-m", - "ragas_once.cli", + "evaluate", fmt.Sprintf("--apibase=%s", apiBase), - fmt.Sprintf("--llm=%s", model), + fmt.Sprintf("--embedding-apibase=%s", apiBase), + fmt.Sprintf("--llm-model=%s", model), fmt.Sprintf("--apikey=%s", apiKey), + fmt.Sprintf("--embedding-apikey=%s", apiKey), fmt.Sprintf("--dataset=%s", filepath.Join(defaultPVCMountPath, defaultTestRagFile)), fmt.Sprintf("--metrics=%s", strings.Join(metrics, ",")), - fmt.Sprintf("--embedding=%s", embedderModelList[0]), + fmt.Sprintf("--embedding-model=%s", embedderModelList[0]), }, VolumeMounts: []v1.VolumeMount{ { diff --git a/pypi/ragas_once/LICENSE b/pypi/ragas_once/LICENSE deleted file mode 100644 index 77a54b128..000000000 --- a/pypi/ragas_once/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [2023 KubeAGI] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/pypi/ragas_once/README.md b/pypi/ragas_once/README.md deleted file mode 100644 index d327649d8..000000000 --- a/pypi/ragas_once/README.md +++ /dev/null @@ -1,59 +0,0 @@ -# Ragas CLI - -A one-step Ragas cli tool to evaluate QCAG testsets generated by RAG apps. (Q = Question, C = Contexts, A = Answer, G = Ground_truth) - -## Install with pip - -```bash -pip install ragas langchain==0.0.354 -``` - -then: - -```bash -pip install ragas_once -``` - -## Arguments - -- `--apibase`: Specifies the base URL for the API. - - Default value is "https://api.openai.com/v1". -- `--apikey`: Specifies the API key to authenticate requests. - - Not required if using psuedo-openai API server, e.g. vLLM, Fastchat, etc. -- `--llm`: Specifies the lllm model to use for evaluation. - - Default value is "gpt-3.5-turbo". Langchain compatible. -- `--embedding`: Specifies the embeddings model to use for evaluation. - - Will use OpenAI embeddings if not set. -- `--metrics`: Specifies the metrics to use for evaluation. - - Will use Ragas default metrics if not set. - - Default metrics: `answer_relevancy,context_precision,context_recall,context_relevancy,faithfulness` - - Other metrics: `"answer_similarity", "answer_correctness"` -- `--dataset`: Specifies the path to the dataset for evaluation. - - Dataset format must meet RAGAS requirements. - - Will use fiqa dataset as demo if not set. - -## Usage - -### Fiqa dataset demo: - -```bash -ro --apikey "YOUR_OPENAI_API_KEY" -``` - -### Evaluate with another OpenAI Compatible api services - -Say you have another openai-compatible api service like [fastchat](https://github.com/lm-sys/FastChat): - -- `apibase`: "http://fastchat-api.172.22.96.167.nip.io/v1" -- `apikey`: "fake" -- `llm`: "9048a24f-b650-4197-be1c-77352bd67ead" (The unique id of a lllm model) -- `embedding`: "e1c3eed4-4ae1-4afd-98e3-a1bc57136ff7" (The unique id of a embedding model) - -```bash -ro --apibase "http://fastchat-api.172.22.96.167.nip.io/v1" --apikey "fake" \ - --llm "9048a24f-b650-4197-be1c-77352bd67ead" --embedding "e1c3eed4-4ae1-4afd-98e3-a1bc57136ff7" --dataset "path/to/dataset.csv" -``` - -### Prepare Dataset - -See [**Ragas documentation**](https://docs.ragas.io/en/stable/howtos/applications/data_preparation.html) \ No newline at end of file diff --git a/pypi/ragas_once/pyproject.toml b/pypi/ragas_once/pyproject.toml deleted file mode 100644 index 40834399d..000000000 --- a/pypi/ragas_once/pyproject.toml +++ /dev/null @@ -1,30 +0,0 @@ -[build-system] -requires = [ - "setuptools>=61.0", - "setuptools-scm", - "ragas==0.0.22", - "langchain==0.0.354" -] -build-backend = "setuptools.build_meta" - -[project] -name = "ragas_once" -version = "0.0.1" -authors = [ - { name = "Kielo", email = "lanture1064@gmail.com" }, -] -description = "A one-step Ragas cli tool to evaluate RAG apps" -readme = "README.md" -requires-python = ">=3.8" -classifiers = [ - "Programming Language :: Python :: 3", - "License :: OSI Approved :: MIT License", - "Operating System :: OS Independent", -] -dependencies = [ - "langchain==0.0.354", - "ragas==0.0.22", -] - -[project.scripts] -ro = "ragas_once.cli:main" \ No newline at end of file diff --git a/pypi/ragas_once/setup.py b/pypi/ragas_once/setup.py deleted file mode 100644 index 9ec468f89..000000000 --- a/pypi/ragas_once/setup.py +++ /dev/null @@ -1,5 +0,0 @@ -# make a setup.py for ragacli package - -from setuptools import setup - -setup(name="ragas_once", version="0.0.1") diff --git a/pypi/ragas_once/src/ragas_once/__init__.py b/pypi/ragas_once/src/ragas_once/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/pypi/ragas_once/src/ragas_once/cli.py b/pypi/ragas_once/src/ragas_once/cli.py deleted file mode 100644 index a8e12a2b3..000000000 --- a/pypi/ragas_once/src/ragas_once/cli.py +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright 2024 KubeAGI. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse - -from ragas_once.eval import RagasEval - - -def main(): - """ - This function is the entry point for the program. It parses command line arguments and sets up the necessary variables for evaluation. - - Parameters: - None - - Returns: - The result of evaluating the test set using the specified metrics. - """ - parser = argparse.ArgumentParser(description="RAGAS CLI") - parser.add_argument( - "--apibase", - type=str, - default="https://api.openai.com/v1", - help="Specifies the base URL for the API. Defaults to OpenAI.", - ) - parser.add_argument( - "--apikey", type=str, help="Specifies the API key to authenticate requests." - ) - parser.add_argument( - "--llm", - type=str, - default="gpt-3.5-turbo", - help="Specifies the model to use for evaluation. Defaults to gpt-3.5-turbo.", - ) - parser.add_argument( - "--embedding", - type=str, - default="text-embedding-ada-002", - help="Specifies embeddings model (or its path) to use for evaluation. Will use OpenAI embeddings if not set.", - ) - parser.add_argument( - "--metrics", - type=str, - default="answer_relevancy,context_precision,context_recall,context_relevancy,faithfulness", - help="Specifies the metrics to use for evaluation. Comma-separated values.", - ) - parser.add_argument( - "--dataset", - type=str, - help="Specifies the path to the dataset for evaluation. Will use fiqa dataset if not set.", - ) - - args = parser.parse_args() - - # Initialize ragas_once with provided arguments - once = RagasEval( - api_base=args.apibase, - api_key=args.apikey, - llm_model=args.llm, - embedding_model=args.embedding, - ) - - # Prepare the dataset - dataset = once.prepare_dataset(args.dataset) - - if dataset is None: - raise ValueError("No dataset provided") - - # Get the metrics to evaluate - metrics = once.get_ragas_metrics(args.metrics.split(",")) - - # Run the evaluation - once.evaluate(dataset=dataset, metrics=metrics) - - -if __name__ == "__main__": - main() diff --git a/pypi/ragas_once/src/ragas_once/embeddings/openai.py b/pypi/ragas_once/src/ragas_once/embeddings/openai.py deleted file mode 100644 index ac3a16197..000000000 --- a/pypi/ragas_once/src/ragas_once/embeddings/openai.py +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright 2024 KubeAGI. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - -from langchain.embeddings import OpenAIEmbeddings as BaseOpenAIEmbeddings -from ragas.embeddings import RagasEmbeddings -from ragas.exceptions import OpenAIKeyNotFound -from ragas.utils import NO_KEY - - -class OpenAIEmbeddings(BaseOpenAIEmbeddings, RagasEmbeddings): - api_key: str = NO_KEY - - def __init__( - self, api_key: str = NO_KEY, api_base: str = NO_KEY, model_name: str = NO_KEY - ): - # api key - key_from_env = os.getenv("OPENAI_API_KEY", NO_KEY) - if key_from_env != NO_KEY: - openai_api_key = key_from_env - else: - openai_api_key = api_key - super(BaseOpenAIEmbeddings, self).__init__( - openai_api_key=openai_api_key, openai_api_base=api_base, model=model_name - ) - self.api_key = openai_api_key - - def validate_api_key(self): - if self.openai_api_key == NO_KEY: - os_env_key = os.getenv("OPENAI_API_KEY", NO_KEY) - if os_env_key != NO_KEY: - self.api_key = os_env_key - else: - raise OpenAIKeyNotFound diff --git a/pypi/ragas_once/src/ragas_once/eval.py b/pypi/ragas_once/src/ragas_once/eval.py deleted file mode 100644 index 9a373bd78..000000000 --- a/pypi/ragas_once/src/ragas_once/eval.py +++ /dev/null @@ -1,185 +0,0 @@ -# Copyright 2024 KubeAGI. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Union - -import pandas as pd -from datasets import Dataset -from langchain.chat_models import ChatOpenAI -from ragas import evaluate -from ragas.embeddings import RagasEmbeddings -from ragas.llms import LangchainLLM, RagasLLM -from ragas.metrics import (AnswerCorrectness, AnswerRelevancy, - AnswerSimilarity, ContextPrecision, ContextRecall, - ContextRelevancy, Faithfulness) -from ragas.metrics.base import Metric -from ragas.utils import NO_KEY -from ragas_once.embeddings.openai import OpenAIEmbeddings - - -class RagasEval: - """ - The RagasOnce class is a tool for evaluating natural language models (NLMs) using various metrics for question-answering tasks. It utilizes OpenAI's language model (LLM) and text embedding models for evaluation. - - The class features include: - - Initialization: The class constructor allows customization of the API base URL, API key, LLM model, and embedding model. - Dataset Preparation: The prepare_dataset method prepares the dataset for evaluation, splitting specific columns and converting them into lists. - Metrics Configuration: The get_ragas_metrics method sets the metrics for evaluation, creating instances of various metric classes based on the requested metrics. - Evaluation: The evaluate method performs the evaluation of the dataset using specified metrics, calculating summary scores and saving the results to CSV files. - """ - - # use openai llm&embedding by default - api_base: str = "https://api.openai.com/v1" - api_key: str = "fake" - llm_model: str = "gpt-3.5-turbo" - embedding_model: str = "text-embedding-ada-002" - - llm: RagasLLM - embeddings: RagasEmbeddings - - def __init__( - self, - api_base: str = NO_KEY, - api_key: str = NO_KEY, - llm_model: str = NO_KEY, - embedding_model: str = NO_KEY, - ): - # Initialize attributes based on provided arguments or default values - self.api_base = api_base if api_base != NO_KEY else self.api_base - self.api_key = api_key if api_key != NO_KEY else self.api_key - self.llm_model = llm_model if llm_model != NO_KEY else self.llm_model - self.embedding_model = ( - embedding_model if embedding_model != NO_KEY else self.embedding_model - ) - - # Initialize judge llm - self.llm = LangchainLLM( - llm=ChatOpenAI( - model_name=self.llm_model, - openai_api_key=self.api_key, - openai_api_base=self.api_base, - ) - ) - - # Initialize judge embedding - self.embeddings = OpenAIEmbeddings( - api_key=self.api_key, - api_base=self.api_base, - model_name=self.embedding_model, - ) - - def prepare_dataset(self, dataset: str = NO_KEY) -> Dataset: - """ - Prepares the dataset for evaluation. - - Parameters: - dataset (str): The path to the dataset file. - - Returns: - Dataset: The prepared dataset. - - Raises: - ValueError: If no dataset is provided. - """ - if dataset == NO_KEY: - raise ValueError("No dataset provided") - try: - data = pd.read_csv(dataset) - except Exception as e: - print("An error occurred during prepare dataset:", str(e)) - return - - columns_to_split = ["ground_truths", "contexts"] - for column in columns_to_split: - if column in data.columns: - data[column] = ( - data[column].astype(str).apply(lambda x: x.split(";")).to_list() - ) - - return Dataset.from_pandas(data) - - def get_ragas_metrics( - self, metrics: list[str], batch_size: Union[int, None] = 1 - ) -> list[Metric]: - """ - Sets the metrics for evaluation. - - Parameters: - metrics (list[str]): A list of metric names to be set. - - Returns: - list[Metric]: A list of Metric objects representing the set metrics. - """ - context_precision = ContextPrecision(llm=self.llm, batch_size=batch_size) - context_recall = ContextRecall(llm=self.llm, batch_size=batch_size) - context_relevancy = ContextRelevancy(llm=self.llm, batch_size=batch_size) - - answer_relevancy = AnswerRelevancy( - llm=self.llm, embeddings=self.embeddings, batch_size=batch_size - ) - answer_similarity = AnswerSimilarity( - llm=self.llm, embeddings=self.embeddings, batch_size=batch_size - ) - answer_correctness = AnswerCorrectness( - llm=self.llm, answer_similarity=answer_similarity, batch_size=batch_size - ) - faithfulness = Faithfulness(llm=self.llm, batch_size=batch_size) - - ms = [] - for m in metrics: - if m == "context_precision": - ms.append(context_precision) - elif m == "context_recall": - ms.append(context_recall) - elif m == "context_relevancy": - ms.append(context_relevancy) - elif m == "answer_relevancy": - ms.append(answer_relevancy) - elif m == "answer_correctness": - ms.append(answer_correctness) - elif m == "answer_similarity": - ms.append(answer_similarity) - elif m == "faithfulness": - ms.append(faithfulness) - return ms - - def evaluate( - self, - dataset: Dataset, - metrics: list[Metric] | None = None, - column_map: dict[str, str] = {}, - ): - """ - Evaluates the dataset using the specified metrics and saves the evaluation results. - - Parameters: - dataset (Dataset): The dataset to be evaluated. - metrics (list[Metric] | None): The list of metrics to evaluate the dataset. Defaults to None. - column_map (dict[str, str]): A mapping of column names in the dataset to the corresponding column names - expected by the evaluation function. Defaults to an empty dictionary. - - Returns: - None - """ - try: - result = evaluate(dataset, metrics, column_map) - except Exception as e: - print("An error occurred during evaluation:", str(e)) - return - # count total score and avearge - summary = result.scores.to_pandas().mean() - summary["total_score"] = summary.mean() - summary.to_csv("summary.csv") - result.to_pandas().to_csv("result.csv")