Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
yao.liu committed Jul 11, 2024
1 parent 479d54f commit b97f2ba
Show file tree
Hide file tree
Showing 8 changed files with 552 additions and 38 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ configs/eval_debug*.py
configs/viz_*.py
configs/**/*_bkup.py
opencompass/**/*_bkup.py
data
work_dirs
outputs
models/*
Expand Down
12 changes: 12 additions & 0 deletions configs/datasets/subjective/multiround/mtbench101_judge.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,18 @@
'mtbench101_test',
'mtbench101_ja',
'mtbench101_ja_test',
'mtbench101_ar',
'mtbench101_ar_test',
'mtbench101_id',
'mtbench101_id_test',
'mtbench101_fr',
'mtbench101_fr_test',
'mtbench101_de',
'mtbench101_de_test',
'mtbench101_it',
'mtbench101_it_test',
'mtbench101_es',
'mtbench101_es_test'
]
data_path = 'data/subjective/'

Expand Down
41 changes: 41 additions & 0 deletions data/subjective/translation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import jsonlines
from tqdm import tqdm
from singularity_nlp.util.translation import translate


def data_translate(input_file, output_file, line_cnt=None, target='ja', source='en'):
data_ja = []
with jsonlines.open(input_file, 'r') as fr, jsonlines.open(output_file, 'w') as fw:
for item in tqdm(fr, total=line_cnt):
history_ja = []
flag = True
for turn in item['history']:
turn_ja = {}
for k, v in turn.items():
v_ja = translate(v, target=target, source=source, format='text')
if v_ja:
turn_ja[k] = v_ja
else:
flag = False # 标识一下,整个item都要舍弃
break
if flag is False:
break
history_ja.append(turn_ja)

if flag:
item_ja = item.copy()
item_ja['history'] = history_ja
data_ja.append(item_ja)
fw.write(item_ja)
return data_ja



if __name__ == '__main__':
data_ja = data_translate('mtbench101.jsonl', 'mtbench101_ja.jsonl', line_cnt=1388, target='ja', source='en')
data_ar = data_translate('mtbench101.jsonl', 'mtbench101_ar.jsonl', line_cnt=1388, target='ar', source='en')
data_id = data_translate('mtbench101.jsonl', 'mtbench101_id.jsonl', line_cnt=1388, target='id', source='en')
data_fr = data_translate('mtbench101.jsonl', 'mtbench101_fr.jsonl', line_cnt=1388, target='fr', source='en')
data_de = data_translate('mtbench101.jsonl', 'mtbench101_de.jsonl', line_cnt=1388, target='de', source='en')
data_it = data_translate('mtbench101.jsonl', 'mtbench101_it.jsonl', line_cnt=1388, target='it', source='en')
data_es = data_translate('mtbench101.jsonl', 'mtbench101_es.jsonl', line_cnt=1388, target='es', source='en')
Loading

0 comments on commit b97f2ba

Please sign in to comment.