Skip to content
This repository has been archived by the owner on Jul 23, 2024. It is now read-only.

Commit

Permalink
Update
Browse files Browse the repository at this point in the history
  • Loading branch information
Betswish committed Dec 31, 2023
1 parent 7a6181e commit a21fec1
Show file tree
Hide file tree
Showing 3 changed files with 325 additions and 58 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
accelerate 0.19.0
aiohttp 3.8.4
aiosignal 1.3.1
antlr4-python3-runtime 4.9.3
async-timeout 4.0.2
attrs 23.1.0
bitsandbytes 0.39.0
bitsandbytes-cuda113 0.30.1
cachetools 5.3.1
captum 0.6.0
certifi 2023.5.7
chardet 3.0.4
charset-normalizer 3.1.0
click 8.1.3
cmake 3.26.3
colorama 0.4.6
commonmark 0.9.1
contourpy 1.0.7
cycler 0.11.0
Cython 0.29.34
datasets 2.12.0
dill 0.3.6
filelock 3.12.0
fonttools 4.39.4
frozenlist 1.3.3
fsspec 2023.5.0
google-api-core 2.11.0
google-auth 2.19.1
google-cloud-core 2.3.2
google-cloud-translate 3.11.1
googleapis-common-protos 1.59.0
googletrans 4.0.0rc1
grpcio 1.54.2
grpcio-status 1.54.2
h11 0.9.0
h2 3.2.0
higher 0.2.1
hpack 3.0.0
hstspreload 2023.1.1
httpcore 0.9.1
httpx 0.13.3
huggingface-hub 0.14.1
hydra-core 1.3.2
hyperframe 5.2.0
idna 2.10
inquirerpy 0.3.4
inseq 0.5.0.dev0
Jinja2 3.1.2
joblib 1.2.0
jsonschema 4.17.3
kiwisolver 1.4.4
lang2vec 1.1.6
lang2vec 1.1.6
lang2vec 1.1.6
lit 16.0.5
MarkupSafe 2.1.2
matplotlib 3.5.3
mpmath 1.3.0
multidict 6.0.4
multiprocess 0.70.14
mypy-extensions 1.0.0
networkx 3.1
numpy 1.24.3
nvidia-cublas-cu11 11.10.3.66
nvidia-cuda-cupti-cu11 11.7.101
nvidia-cuda-nvrtc-cu11 11.7.99
nvidia-cuda-runtime-cu11 11.7.99
nvidia-cudnn-cu11 8.5.0.96
nvidia-cufft-cu11 10.9.0.58
nvidia-curand-cu11 10.2.10.91
nvidia-cusolver-cu11 11.4.0.1
nvidia-cusparse-cu11 11.7.4.91
nvidia-nccl-cu11 2.14.3
nvidia-nvtx-cu11 11.7.91
omegaconf 2.3.0
overrides 7.3.1
packaging 23.1
pandas 2.0.1
pastel 0.2.1
pfzy 0.3.4
Pillow 9.5.0
pip 23.0.1
poethepoet 0.13.1
prompt-toolkit 3.0.38
proto-plus 1.22.2
protobuf 3.20.3
psutil 5.9.5
pyarrow 12.0.0
pyasn1 0.5.0
pyasn1-modules 0.3.0
Pygments 2.15.1
pyparsing 3.0.9
pyproject-toml 0.0.10
pyre-extensions 0.0.23
pyrsistent 0.19.3
python-dateutil 2.8.2
pytz 2023.3
PyYAML 6.0
regex 2023.5.5
requests 2.30.0
responses 0.18.0
rfc3986 1.5.0
rich 10.16.2
rsa 4.9
sacremoses 0.0.53
safetensors 0.3.1
scikit-learn 1.2.2
scipy 1.10.1
sentencepiece 0.1.99
setuptools 66.0.0
six 1.16.0
sniffio 1.3.0
sympy 1.12
threadpoolctl 3.1.0
tokenizers 0.13.3
toml 0.10.2
tomli 2.0.1
torch 2.0.1
torchtyping 0.1.4
tqdm 4.65.0
transformers 4.30.0.dev0
triton 2.0.0
typeguard 2.13.3
typing_extensions 4.5.0
typing-inspect 0.8.0
tzdata 2023.3
unicodecsv 0.14.1
unimorph-inflect 0.0.1
unimorph-inflect 0.0.1
unimorph-inflect 0.0.1
urllib3 1.26.16
wcwidth 0.2.6
wheel 0.38.4
xformers 0.0.16
xxhash 3.2.0
yarl 1.9.2
79 changes: 21 additions & 58 deletions src/genbench/tasks/cross_lingual_consistency/task.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from collections import defaultdict
from typing import Any, Dict, List, Mapping, Optional

import datasets
Expand All @@ -8,6 +7,7 @@
from genbench import Task
from genbench.api import PreparationStrategy


class CrossLingualConsistencyTask(Task):
def _load_data_source(
self,
Expand Down Expand Up @@ -146,80 +146,43 @@ def get_prepared_datasets(
def evaluate_predictions(
self,
*,
predictions: List[Mapping[str, Any]] = None,
ranked=None,
origin=None,
gold: datasets.Dataset = None,
) -> Dict[str, float]:
def softmax(x):
"""Compute softmax values for each sets of scores in x."""
return np.exp(x) / np.sum(np.exp(x), axis=0)

# Make sure that the predictions are in the same order as the gold dataset
assert len(predictions) == len(gold)

# Just to make sure the gold dataset is the same as the one we generated in `get_prepared_datasets`
assert "lang" in gold.features
assert "_genbnech_idx" in gold.features

# Also, make sure that predictions contain logprobs for each option
assert all(
"target_option_logprobs" in pred and len(pred["target_option_logprobs"]) == len(pred["target_options"])
for pred in predictions
)

# Group the prediction and instances such that we have:
# _genbnech_idx -> {
# "lang_id_1": { ...data_instance..., target_option_logprobs: ... }
# "lang_id_2": { ...data_instance..., target_option_logprobs: ... }
# },

grouped_examples = defaultdict(dict)
for pred, gold in zip(predictions, gold):
original_idx = gold["_genbnech_idx"]
lang = gold["lang"]
grouped_examples[original_idx][lang] = {
**gold,
**pred,
}
# Split candidates of the two langs
lang1_rankings = ranked[: len(ranked) / 2]
lang2_rankings = ranked[len(ranked) / 2 :]
cand_list1 = [[j for j in list(i.keys())] for i in origin[: len(origin) / 2]]
cand_list2 = [[j for j in list(i.keys())] for i in origin[len(origin) / 2 :]]

num_consistent = 0

CLC_score = 0
count = 0
langs = []
# Now, we compute the cross lingual consistency score
for idx, example in grouped_examples.items():
# Rerank the options based on the logprobs
for lang, data in example.items():
if len(langs) < 2:
langs.append(lang)

logprobs = data["target_option_logprobs"]
sorted_options = sorted(
zip(data["target_options"], logprobs),
key=lambda x: x[1],
reverse=False,
)
sorted_options, logprobs = zip(*sorted_options)
grouped_examples[idx][lang]["target_options"] = list(sorted_options)
grouped_examples[idx][lang]["target_option_logprobs"] = list(logprobs)

# Compute the cross lingual consistency score
ranking1 = grouped_examples[idx][langs[0]]["target_options"]
ranking2 = grouped_examples[idx][langs[1]]["target_options"]
for i in range(len(lang1_rankings)):
ranking1 = lang1_rankings[i]
ranking2 = lang2_rankings[i]

candidate1 = cand_list1[i]
candidate2 = cand_list2[i]

order = [len(ranking1) - i for i in range(len(ranking1))]
order = np.array(order)

weight = softmax(order)

for j in range(len(ranking1)):
set1 = {ranking1.index(i) for i in ranking1[: j + 1]}
set2 = {ranking2.index(i) for i in ranking2[: j + 1]}
set1 = {candidate1.index(i) for i in ranking1[: j + 1]}
set2 = {candidate2.index(i) for i in ranking2[: j + 1]}

cover = set1.intersection(set2)
CLC_score += weight[j] * (len(cover) / len(set1))

count += 1
CLC_score /= count
num_consistent += weight[j] * (len(cover) / len(set1))

# Compute the final score
CLC_score = num_consistent / len(lang1_rankings)
result = {
"cross_lingual_consistency": CLC_score,
}
Expand Down
Loading

0 comments on commit a21fec1

Please sign in to comment.