diff --git a/configs/datasets/truthfulqa/truthfulqa_gen_1e7d8d.py b/configs/datasets/truthfulqa/truthfulqa_gen_1e7d8d.py index 269ab9486..74fa449c3 100644 --- a/configs/datasets/truthfulqa/truthfulqa_gen_1e7d8d.py +++ b/configs/datasets/truthfulqa/truthfulqa_gen_1e7d8d.py @@ -18,8 +18,11 @@ # Metrics such as 'truth' and 'info' needs # OPENAI_API_KEY with finetuned models in it. # Please use your own finetuned openai model with keys and refers to -# the source code for more details -# Metrics such as 'bleurt', 'rouge', 'bleu' are free to test +# the source code of `TruthfulQAEvaluator` for more details. +# +# If you cannot provide available models for 'truth' and 'info', +# and want to perform basic metric eval, please set +# `metrics=('bleurt', 'rouge', 'bleu')` # When key is set to "ENV", the key will be fetched from the environment # variable $OPENAI_API_KEY. Otherwise, set key in here directly. diff --git a/configs/datasets/truthfulqa/truthfulqa_gen_5ddc62.py b/configs/datasets/truthfulqa/truthfulqa_gen_5ddc62.py index 47227cc63..804675409 100644 --- a/configs/datasets/truthfulqa/truthfulqa_gen_5ddc62.py +++ b/configs/datasets/truthfulqa/truthfulqa_gen_5ddc62.py @@ -20,8 +20,11 @@ # Metrics such as 'truth' and 'info' needs # OPENAI_API_KEY with finetuned models in it. # Please use your own finetuned openai model with keys and refers to -# the source code for more details -# Metrics such as 'bleurt', 'rouge', 'bleu' are free to test +# the source code of `TruthfulQAEvaluator` for more details. +# +# If you cannot provide available models for 'truth' and 'info', +# and want to perform basic metric eval, please set +# `metrics=('bleurt', 'rouge', 'bleu')` # When key is set to "ENV", the key will be fetched from the environment # variable $OPENAI_API_KEY. Otherwise, set key in here directly. diff --git a/opencompass/datasets/truthfulqa.py b/opencompass/datasets/truthfulqa.py index a81cfc446..8ee46af3f 100644 --- a/opencompass/datasets/truthfulqa.py +++ b/opencompass/datasets/truthfulqa.py @@ -39,7 +39,9 @@ class TruthfulQAEvaluator(BaseEvaluator): Args: truth_model (str): Truth model name. See "notes" for details. + Defaults to ''. info_model (str): Informativeness model name. See "notes" for details. + Defaults to ''. metrics (tuple): Computing needed metrics for truthfulqa dataset. Supported metrics are `bleurt`, `rouge`, `bleu`, `truth`, `info`. key (str): Corresponding API key. If set to `ENV`, find it in @@ -67,12 +69,11 @@ class TruthfulQAEvaluator(BaseEvaluator): 'bleu': 'bleu', } - def __init__( - self, - truth_model: str, # noqa - info_model: str, # noqa - metrics=('bleurt', 'rouge', 'bleu', 'truth', 'info'), - key='ENV'): + def __init__(self, + truth_model: str = '', + info_model: str = '', + metrics=('bleurt', 'rouge', 'bleu', 'truth', 'info'), + key='ENV'): self.API_MODEL = { 'truth': truth_model, 'info': info_model, @@ -85,6 +86,11 @@ def __init__( if metric in self.SCORE_KEY.keys(): self.metrics.append(metric) if metric in self.API_MODEL.keys(): + assert self.API_MODEL.get(metric), \ + f'`{metric}_model` should be set to perform API eval.' \ + 'If you want to perform basic metric eval, ' \ + f'please refer to the docstring of {__file__} ' \ + 'for more details.' self.api_metrics.append(metric) if self.api_metrics: