Skip to content

Commit

Permalink
Update cdme config and evaluator (#812)
Browse files Browse the repository at this point in the history
* update cdme config and evaluator

* fix cdme prompt

* move CDME trim post-processor as a separate evaluator

---------

Co-authored-by: 郭琦鹏 <[email protected]>
  • Loading branch information
QipengGuo and 郭琦鹏 committed Jan 19, 2024
1 parent f09a2ff commit e975a96
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 2 deletions.
34 changes: 32 additions & 2 deletions configs/datasets/cdme/cdme200k.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,21 @@ def generate_depth_percents(intervals, interval_type):
dataset_postprocessor=dict(type=cdme_dataset_postprocess),
pred_role='BOT')

context_lengths = list(range(1000, 201000, 1000))
cdme_trim_eval_cfg = dict(
evaluator=dict(type=CDMEEvaluator, use_trim=True),
pred_postprocessor=dict(type=cdme_postprocess),
dataset_postprocessor=dict(type=cdme_dataset_postprocess),
pred_role='BOT')

#context_lengths = list(range(1000, 201000, 1000))
context_lengths = [16000, 32000, 48000, 64000, 80000, 96000, 112000, 128000, 144000, 160000, 176000, 192000, 200000]
document_depth_percent_intervals = 20
document_depth_percent_interval_type = "linear"

base_path = './data/CDME'
file_list = ['zh_finance.jsonl']
cdme_datasets = []
cdme_trim_datasets = []

for original_context_length in context_lengths:
for depth_percent in generate_depth_percents(
Expand All @@ -73,9 +81,31 @@ def generate_depth_percents(intervals, interval_type):
'language': 'Chinese',
'needle': '\n小明最喜欢的实习的地点就是上海人工智能实验室。\n',
'retrieval_question': '小明最喜欢的实习地点是哪里?请按照'
'“小明最喜欢的实习地点就是________。”的格式回答。',
'“小明最喜欢的实习地点就是________。”的格式回答。\n',
'reader_cfg': cdme_reader_cfg,
'infer_cfg': cdme_infer_cfg,
'eval_cfg': cdme_eval_cfg
}
cdme_datasets.append(dataset_dict)

trim_dataset_dict = {
'abbr': f'CDME_Length{original_context_length}'
f'Depth{int(depth_percent)}',
'type': CDMEDataset,
'path': base_path,
'length': original_context_length,
'depth': int(depth_percent),
'tokenizer_model': 'gpt-4',
'file_list': file_list,
'num_repeats_per_file': 10,
'length_buffer': 200,
'guide': True,
'language': 'Chinese',
'needle': '\n小明最喜欢的实习的地点就是上海人工智能实验室。\n',
'retrieval_question': '小明最喜欢的实习地点是哪里?请按照'
'“小明最喜欢的实习地点就是________。”的格式回答。\n',
'reader_cfg': cdme_reader_cfg,
'infer_cfg': cdme_infer_cfg,
'eval_cfg': cdme_trim_eval_cfg
}
cdme_trim_datasets.append(trim_dataset_dict)
31 changes: 31 additions & 0 deletions opencompass/datasets/cdme/cdme.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,32 @@ def _generate_prompt(context, retrieval_question):

class CDMEEvaluator(BaseEvaluator):

def __init__(self, use_trim=False):
self.use_trim = use_trim

@staticmethod
def _trim_prediction(prediction, reference):
"""Trims the prediction string based on the length of the reference
string.
Args:
prediction (str): The prediction string.
reference (str): The reference string.
Returns:
str: The trimmed prediction string.
"""
l08 = int(0.8 * len(reference))
l12 = int(1.2 * len(reference))
trimmed_prediction = prediction[:l12]

if len(trimmed_prediction) > l08 and \
reference[-1] in trimmed_prediction[l08:]:
end_pos = l08 + trimmed_prediction[l08:].index(reference[-1]) + 1
trimmed_prediction = trimmed_prediction[:end_pos]

return trimmed_prediction

def levenshtein_distance(self, s1, s2):
if len(s1) < len(s2):
return self.levenshtein_distance(s2, s1)
Expand Down Expand Up @@ -159,6 +185,11 @@ def score(self, predictions, references):
for prediction, reference in zip(predictions, references):
prediction = re.sub(r'\s+', '', prediction)
reference = re.sub(r'\s+', '', reference)

if self.use_trim:
prediction = CDMEEvaluator._trim_prediction(
prediction, reference)

edit_distance = self.levenshtein_distance(prediction, reference)
max_len = max(len(prediction), len(reference))
score = 100 * (1 -
Expand Down

0 comments on commit e975a96

Please sign in to comment.