Skip to content
This repository has been archived by the owner on Jul 23, 2024. It is now read-only.

Commit

Permalink
Merge branch 'main' into cross_lingual_consistency
Browse files Browse the repository at this point in the history
  • Loading branch information
kazemnejad authored Dec 31, 2023
2 parents a21fec1 + ff08e9f commit 3b436b4
Show file tree
Hide file tree
Showing 148 changed files with 5,879 additions and 2 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/task_submission_ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ jobs:
- name: Parse the Task ID from PR's title
id: pr_task_id
run: |
task_id=$(echo '${{ github.event.pull_request.title }}' | sed -n -e 's/^\[Task Submission\][[:alnum:][:space:]()]\+[[:space:]]*(`\([^`]*\)`)[[:space:]]*.*/\1/p')
task_id=$(echo '${{ github.event.pull_request.title }}' | sed -n -e 's/^\[Task Submission\][[:alnum:][:space:]()_-]\+[[:space:]]*(`\([^`]*\)`)[[:space:]]*.*/\1/p')
echo "Task ID: $task_id"
echo "task_id=$task_id" >> $GITHUB_OUTPUT
shell: bash
Expand Down Expand Up @@ -111,4 +111,4 @@ jobs:
- name: Test Task
run: |
genbench-cli test-task -i ${{ steps.pr_task_id.outputs.task_id }} --tests-dir ./tests
genbench-cli test-task -i ${{ steps.pr_task_id.outputs.task_id }} --tests-dir ./tests
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
# Numpy is needed for some of HF's metrics
"numpy",
"typing_extensions>=4.6",
"statsmodels>=0.14",
]


Expand Down
5 changes: 5 additions & 0 deletions src/genbench/tasks/europarl_dbca_splits/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from genbench import TaskDict


class EuroparlDbcaSplits(TaskDict):
pass
116 changes: 116 additions & 0 deletions src/genbench/tasks/europarl_dbca_splits/_base_task.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
from collections import OrderedDict
from typing import Any, List, Mapping

import evaluate
import numpy as np
from datasets import Dataset

from genbench import Task
from genbench.api import EvaluationResult, TaskType
from genbench.utils.logging import get_logger


logger = get_logger(__name__)


class BaseDbcaTask(Task):
"""This task evaluates how well an NMT model generalises to a shifted distribution of
dependency relations. In practice, this means that the test set includes novel
(<head lemma>, <deprel>, <dependant lemma>) tuples (=compounds) that were not seen in
the training set, while having similar relative frequencies of the lemmas and dependency
relation tags (= elements of the compound tuples = atoms).
"""

def evaluate_predictions(
self,
*,
predictions: List[Mapping[str, Any]] = None,
gold: Dataset = None,
) -> EvaluationResult:
result = OrderedDict()
for metric_config in self.config.evaluation_metrics:
hf_id = metric_config.hf_id
if isinstance(hf_id, str):
hf_id = [hf_id]

metric = evaluate.load(*hf_id, revision=metric_config.git_commit_sha)

refs_lst = [g["target"] for g in gold]
preds_lst = [pred["target"] for pred in predictions]

ref_type = type(refs_lst[0])
pred_type = type(preds_lst[0])
if pred_type != ref_type:
if self.config.task_type != TaskType.MULTIPLE_CHOICE:
raise ValueError(
f"Predictions and references have different types: preds: {pred_type} and refs: {ref_type}. "
)
# Convert predictions to the same type as the references
if pred_type == str and ref_type == int:
logger.warning("Predictions are strings, but references are ints. Converting predictions to ints.")
converted_preds = []
for pred, ref in zip(preds_lst, gold):
assert "target_options" in ref
converted_preds.append(ref["target_options"].index(pred))
preds_lst = converted_preds
elif pred_type == int and ref_type == str:
logger.warning("Predictions are ints, but references are strings. Converting references to ints.")
converted_refs = []
for pred, ref in zip(preds_lst, gold):
assert "target_options" in ref
converted_refs.append(ref["target_options"].index(ref["target"]))
refs_lst = converted_refs
else:
if self.config.task_type == TaskType.MULTIPLE_CHOICE and pred_type != int:
# Convert both predictions and references to int
logger.warning(
"Predictions and references have the same type, but it is not int. Converting both to int."
)
converted_preds = []
converted_refs = []
for pred, ref in zip(preds_lst, gold):
assert "target_options" in ref
converted_preds.append(ref["target_options"].index(pred))
converted_refs.append(ref["target_options"].index(ref["target"]))
preds_lst = converted_preds
refs_lst = converted_refs

extra_kwargs = metric_config.compute_extra_kwargs or {}
output: dict = metric.compute(predictions=preds_lst, references=refs_lst, **extra_kwargs)

if output is None:
raise ValueError(
f"Metric {metric_config.hf_id} returned None. " f"Please check the metric implementation."
)

# Update output keys to include the metric id
metric_id = "_".join(hf_id)
output = {f"hf_{metric_id}__{k}": v for k, v in output.items() if k == "score"}

result.update(output)

return result

def chernoff_coef(self, vec1, vec2, alpha):
"""
The Chernoff coefficient c is a similarity measure C_{alpha}(P||Q)
= sum_k[p_k^alpha * q_k^(1-alpha)] e[0,1] between two (probability)
distributions P and Q. The alpha parameter determines if we want to
measure whether Q includes elements that are not in P.
"""
if alpha < 0 or alpha > 1:
raise ValueError("alpha must be in [0,1]")
# use log to avoid underflow
return np.sum(np.exp((np.log(vec1) * alpha) + (np.log(vec2) * (1 - alpha))), axis=1)

def normalize_vector(self, vector):
"""Normalize a vector to have sum 1."""
return np.nan_to_num(np.divide(vector, np.sum(vector)))

def divergence(self, vec1, vec2, alpha):
"""
Calculate divergence between two vectors.
Atom divergence is 1 - Chernoff coefficient, with alpha=0.5.
Compound divergence is 1 - Chernoff coefficient, with alpha=0.1.
"""
return float(1 - self.chernoff_coef(self.normalize_vector(vec1), self.normalize_vector(vec2), alpha))
Empty file.
43 changes: 43 additions & 0 deletions src/genbench/tasks/europarl_dbca_splits/comdiv0_de/config.jsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
{
name: 'Europarl DBCA splits (comdiv0_de)',

description: 'This task aims to measure how well an NMT model generalises to a shifted distribution of
dependency relations. In practice, this means that the test set includes novel
(<head lemma>, <deprel>, <dependant lemma>) tuples (=compounds) that were not seen in
the training set, while having similar relative frequencies of the lemmas and dependency
relation tags (= elements of the compound tuples = atoms).',

keywords: [
'translation',
'dependency relations',
],

authors: [
'Anssi Moisio',
],

task_type: 'free_form',

data_source: {
type: 'hf',
hf_id: ['Anssi/europarl_dbca_splits', 'comdiv0.0_en_de'],
git_commit_sha: '0dcb7abe8e18aa520cbfcbe9141b916c684912fc'
},

evaluation_metrics: [
{
hf_id: 'chrf',
git_commit_sha: '4b119256e85de9130aa84d87247381c5acb29bc1',
best_score: 100.0,
}
],

has_validation_set: false,
has_train_set: true,

preparation_strategies: {
finetuning: {
objective: 'maximum_likelihood',
},
},
}
3 changes: 3 additions & 0 deletions src/genbench/tasks/europarl_dbca_splits/comdiv0_de/doc.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Europarl DBCA splits (comdiv0_de)

see ../doc.md
5 changes: 5 additions & 0 deletions src/genbench/tasks/europarl_dbca_splits/comdiv0_de/task.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from genbench.tasks.europarl_dbca_splits._base_task import BaseDbcaTask


class EuroparlDbcaSplitsComdiv0De(BaseDbcaTask):
pass
Empty file.
43 changes: 43 additions & 0 deletions src/genbench/tasks/europarl_dbca_splits/comdiv0_el/config.jsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
{
name: 'Europarl DBCA splits (comdiv0_el)',

description: 'This task aims to measure how well an NMT model generalises to a shifted distribution of
dependency relations. In practice, this means that the test set includes novel
(<head lemma>, <deprel>, <dependant lemma>) tuples (=compounds) that were not seen in
the training set, while having similar relative frequencies of the lemmas and dependency
relation tags (= elements of the compound tuples = atoms).',

keywords: [
'translation',
'dependency relations',
],

authors: [
'Anssi Moisio',
],

task_type: 'free_form',

data_source: {
type: 'hf',
hf_id: ['Anssi/europarl_dbca_splits', 'comdiv0.0_en_el'],
git_commit_sha: '0dcb7abe8e18aa520cbfcbe9141b916c684912fc'
},

evaluation_metrics: [
{
hf_id: 'chrf',
git_commit_sha: '4b119256e85de9130aa84d87247381c5acb29bc1',
best_score: 100.0,
}
],

has_validation_set: false,
has_train_set: true,

preparation_strategies: {
finetuning: {
objective: 'maximum_likelihood',
},
},
}
3 changes: 3 additions & 0 deletions src/genbench/tasks/europarl_dbca_splits/comdiv0_el/doc.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Europarl DBCA splits (comdiv0_el)

see ../doc.md
5 changes: 5 additions & 0 deletions src/genbench/tasks/europarl_dbca_splits/comdiv0_el/task.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from genbench.tasks.europarl_dbca_splits._base_task import BaseDbcaTask


class EuroparlDbcaSplitsComdiv0El(BaseDbcaTask):
pass
Empty file.
43 changes: 43 additions & 0 deletions src/genbench/tasks/europarl_dbca_splits/comdiv0_fi/config.jsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
{
name: 'Europarl DBCA splits (comdiv0_fi)',

description: 'This task aims to measure how well an NMT model generalises to a shifted distribution of
dependency relations. In practice, this means that the test set includes novel
(<head lemma>, <deprel>, <dependant lemma>) tuples (=compounds) that were not seen in
the training set, while having similar relative frequencies of the lemmas and dependency
relation tags (= elements of the compound tuples = atoms).',

keywords: [
'translation',
'dependency relations',
],

authors: [
'Anssi Moisio',
],

task_type: 'free_form',

data_source: {
type: 'hf',
hf_id: ['Anssi/europarl_dbca_splits', 'comdiv0.0_en_fi'],
git_commit_sha: '0dcb7abe8e18aa520cbfcbe9141b916c684912fc'
},

evaluation_metrics: [
{
hf_id: 'chrf',
git_commit_sha: '4b119256e85de9130aa84d87247381c5acb29bc1',
best_score: 100.0,
}
],

has_validation_set: false,
has_train_set: true,

preparation_strategies: {
finetuning: {
objective: 'maximum_likelihood',
},
},
}
3 changes: 3 additions & 0 deletions src/genbench/tasks/europarl_dbca_splits/comdiv0_fi/doc.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Europarl DBCA splits (comdiv0_fi)

see ../doc.md
5 changes: 5 additions & 0 deletions src/genbench/tasks/europarl_dbca_splits/comdiv0_fi/task.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from genbench.tasks.europarl_dbca_splits._base_task import BaseDbcaTask


class EuroparlDbcaSplitsComdiv0Fi(BaseDbcaTask):
pass
Empty file.
43 changes: 43 additions & 0 deletions src/genbench/tasks/europarl_dbca_splits/comdiv0_fr/config.jsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
{
name: 'Europarl DBCA splits (comdiv0_fr)',

description: 'This task aims to measure how well an NMT model generalises to a shifted distribution of
dependency relations. In practice, this means that the test set includes novel
(<head lemma>, <deprel>, <dependant lemma>) tuples (=compounds) that were not seen in
the training set, while having similar relative frequencies of the lemmas and dependency
relation tags (= elements of the compound tuples = atoms).',

keywords: [
'translation',
'dependency relations',
],

authors: [
'Anssi Moisio',
],

task_type: 'free_form',

data_source: {
type: 'hf',
hf_id: ['Anssi/europarl_dbca_splits', 'comdiv0.0_en_fr'],
git_commit_sha: '0dcb7abe8e18aa520cbfcbe9141b916c684912fc'
},

evaluation_metrics: [
{
hf_id: 'chrf',
git_commit_sha: '4b119256e85de9130aa84d87247381c5acb29bc1',
best_score: 100.0,
}
],

has_validation_set: false,
has_train_set: true,

preparation_strategies: {
finetuning: {
objective: 'maximum_likelihood',
},
},
}
3 changes: 3 additions & 0 deletions src/genbench/tasks/europarl_dbca_splits/comdiv0_fr/doc.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Europarl DBCA splits (comdiv0_fr)

see ../doc.md
5 changes: 5 additions & 0 deletions src/genbench/tasks/europarl_dbca_splits/comdiv0_fr/task.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from genbench.tasks.europarl_dbca_splits._base_task import BaseDbcaTask


class EuroparlDbcaSplitsComdiv0Fr(BaseDbcaTask):
pass
Empty file.
Loading

0 comments on commit 3b436b4

Please sign in to comment.