Skip to content
This repository has been archived by the owner on Jul 23, 2024. It is now read-only.

[Task Submission] Quantifier Understanding (quantifier_understanding) #18

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
49 changes: 49 additions & 0 deletions src/genbench/tasks/quantifier_understanding/config.jsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
{
name: 'Quantifier Understanding',
description: 'The task evaluates generalization in the understanding of quantifiers. It aims to measure
how well can language models capture the semantics of logical quantifiers in language.
',
keywords: [
'quantifiers',
'LLM',
'prompting',
'semantics'
],

authors: [
'Leroy Wang',
],

data_source: {
type: 'manual',
test: 'https://github.com/lerow/genbench_cbt/blob/quantifier_understanding/src/genbench/tasks/quantifier_understanding/test_data.jsonl',
},

has_validation_set: false,
has_train_set: false,

task_type: 'free_form',

evaluation_metrics: [
{
hf_id: 'exact_match',
git_commit_sha: "758135da6a37ce962b7bc38c6dd5eab672d2b742",
best_score: 1.0,
}
],

preparation_strategies: {
// A recipe for preparing the model to perform the task by configuring its prompt.
// This recipe is suitable for generative LMs such as GPT-3, OPT, T5, etc.
// We provide a few options for configuring the prompt. But, the task creator can
// also provide a custom prompt preparation in the task's Python class.
prompt_based_testing: {
prompt_builder: {
instruction_zero_shot: '', // Left empty because the prompt is in the data
instruction_few_shot: '', // Left empty because the prompt is in the data
input_prefix: 'Q: ',
output_prefix: '\nA: ',
}
},
},
}
100 changes: 100 additions & 0 deletions src/genbench/tasks/quantifier_understanding/test_data.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
{'input': 'There are 10 tables. 7 of the tables are blue. 3 of the tables are red. Are less than half of the tables red? Answer with only one word, true or false.', 'target': 'true'}
{'input': 'There are 10 tables. 6 of the tables are blue. 4 of the tables are red. Are less than half of the tables red? Answer with only one word, true or false.', 'target': 'true'}
{'input': 'There are 10 tables. 5 of the tables are blue. 5 of the tables are red. Are less than half of the tables red? Answer with only one word, true or false.', 'target': 'false'}
{'input': 'There are 10 tables. 4 of the tables are blue. 6 of the tables are red. Are less than half of the tables red? Answer with only one word, true or false.', 'target': 'false'}
{'input': 'There are 10 tables. 3 of the tables are blue. 7 of the tables are red. Are less than half of the tables red? Answer with only one word, true or false.', 'target': 'false'}
{'input': 'There are 10 chairs. 7 of the chairs are blue. 3 of the chairs are red. Are less than half of the chairs red? Answer with only one word, true or false.', 'target': 'true'}
{'input': 'There are 10 chairs. 6 of the chairs are blue. 4 of the chairs are red. Are less than half of the chairs red? Answer with only one word, true or false.', 'target': 'true'}
{'input': 'There are 10 chairs. 5 of the chairs are blue. 5 of the chairs are red. Are less than half of the chairs red? Answer with only one word, true or false.', 'target': 'false'}
{'input': 'There are 10 chairs. 4 of the chairs are blue. 6 of the chairs are red. Are less than half of the chairs red? Answer with only one word, true or false.', 'target': 'false'}
{'input': 'There are 10 chairs. 3 of the chairs are blue. 7 of the chairs are red. Are less than half of the chairs red? Answer with only one word, true or false.', 'target': 'false'}
{'input': 'There are 10 circles. 7 of the circles are blue. 3 of the circles are red. Are less than half of the circles red? Answer with only one word, true or false.', 'target': 'true'}
{'input': 'There are 10 circles. 6 of the circles are blue. 4 of the circles are red. Are less than half of the circles red? Answer with only one word, true or false.', 'target': 'true'}
{'input': 'There are 10 circles. 5 of the circles are blue. 5 of the circles are red. Are less than half of the circles red? Answer with only one word, true or false.', 'target': 'false'}
{'input': 'There are 10 circles. 4 of the circles are blue. 6 of the circles are red. Are less than half of the circles red? Answer with only one word, true or false.', 'target': 'false'}
{'input': 'There are 10 circles. 3 of the circles are blue. 7 of the circles are red. Are less than half of the circles red? Answer with only one word, true or false.', 'target': 'false'}
{'input': 'There are 10 squares. 7 of the squares are blue. 3 of the squares are red. Are less than half of the squares red? Answer with only one word, true or false.', 'target': 'true'}
{'input': 'There are 10 squares. 6 of the squares are blue. 4 of the squares are red. Are less than half of the squares red? Answer with only one word, true or false.', 'target': 'true'}
{'input': 'There are 10 squares. 5 of the squares are blue. 5 of the squares are red. Are less than half of the squares red? Answer with only one word, true or false.', 'target': 'false'}
{'input': 'There are 10 squares. 4 of the squares are blue. 6 of the squares are red. Are less than half of the squares red? Answer with only one word, true or false.', 'target': 'false'}
{'input': 'There are 10 squares. 3 of the squares are blue. 7 of the squares are red. Are less than half of the squares red? Answer with only one word, true or false.', 'target': 'false'}
{'input': 'There are 10 apples. 7 of the apples are blue. 3 of the apples are red. Are less than half of the apples red? Answer with only one word, true or false.', 'target': 'true'}
{'input': 'There are 10 apples. 6 of the apples are blue. 4 of the apples are red. Are less than half of the apples red? Answer with only one word, true or false.', 'target': 'true'}
{'input': 'There are 10 apples. 5 of the apples are blue. 5 of the apples are red. Are less than half of the apples red? Answer with only one word, true or false.', 'target': 'false'}
{'input': 'There are 10 apples. 4 of the apples are blue. 6 of the apples are red. Are less than half of the apples red? Answer with only one word, true or false.', 'target': 'false'}
{'input': 'There are 10 apples. 3 of the apples are blue. 7 of the apples are red. Are less than half of the apples red? Answer with only one word, true or false.', 'target': 'false'}
{'input': 'There are 10 tables. 7 of the tables are blue. 3 of the tables are red. Are more than half of the tables red? Answer with only one word, true or false.', 'target': 'false'}
{'input': 'There are 10 tables. 6 of the tables are blue. 4 of the tables are red. Are more than half of the tables red? Answer with only one word, true or false.', 'target': 'false'}
{'input': 'There are 10 tables. 5 of the tables are blue. 5 of the tables are red. Are more than half of the tables red? Answer with only one word, true or false.', 'target': 'false'}
{'input': 'There are 10 tables. 4 of the tables are blue. 6 of the tables are red. Are more than half of the tables red? Answer with only one word, true or false.', 'target': 'true'}
{'input': 'There are 10 tables. 3 of the tables are blue. 7 of the tables are red. Are more than half of the tables red? Answer with only one word, true or false.', 'target': 'true'}
{'input': 'There are 10 chairs. 7 of the chairs are blue. 3 of the chairs are red. Are more than half of the chairs red? Answer with only one word, true or false.', 'target': 'false'}
{'input': 'There are 10 chairs. 6 of the chairs are blue. 4 of the chairs are red. Are more than half of the chairs red? Answer with only one word, true or false.', 'target': 'false'}
{'input': 'There are 10 chairs. 5 of the chairs are blue. 5 of the chairs are red. Are more than half of the chairs red? Answer with only one word, true or false.', 'target': 'false'}
{'input': 'There are 10 chairs. 4 of the chairs are blue. 6 of the chairs are red. Are more than half of the chairs red? Answer with only one word, true or false.', 'target': 'true'}
{'input': 'There are 10 chairs. 3 of the chairs are blue. 7 of the chairs are red. Are more than half of the chairs red? Answer with only one word, true or false.', 'target': 'true'}
{'input': 'There are 10 circles. 7 of the circles are blue. 3 of the circles are red. Are more than half of the circles red? Answer with only one word, true or false.', 'target': 'false'}
{'input': 'There are 10 circles. 6 of the circles are blue. 4 of the circles are red. Are more than half of the circles red? Answer with only one word, true or false.', 'target': 'false'}
{'input': 'There are 10 circles. 5 of the circles are blue. 5 of the circles are red. Are more than half of the circles red? Answer with only one word, true or false.', 'target': 'false'}
{'input': 'There are 10 circles. 4 of the circles are blue. 6 of the circles are red. Are more than half of the circles red? Answer with only one word, true or false.', 'target': 'true'}
{'input': 'There are 10 circles. 3 of the circles are blue. 7 of the circles are red. Are more than half of the circles red? Answer with only one word, true or false.', 'target': 'true'}
{'input': 'There are 10 squares. 7 of the squares are blue. 3 of the squares are red. Are more than half of the squares red? Answer with only one word, true or false.', 'target': 'false'}
{'input': 'There are 10 squares. 6 of the squares are blue. 4 of the squares are red. Are more than half of the squares red? Answer with only one word, true or false.', 'target': 'false'}
{'input': 'There are 10 squares. 5 of the squares are blue. 5 of the squares are red. Are more than half of the squares red? Answer with only one word, true or false.', 'target': 'false'}
{'input': 'There are 10 squares. 4 of the squares are blue. 6 of the squares are red. Are more than half of the squares red? Answer with only one word, true or false.', 'target': 'true'}
{'input': 'There are 10 squares. 3 of the squares are blue. 7 of the squares are red. Are more than half of the squares red? Answer with only one word, true or false.', 'target': 'true'}
{'input': 'There are 10 apples. 7 of the apples are blue. 3 of the apples are red. Are more than half of the apples red? Answer with only one word, true or false.', 'target': 'false'}
{'input': 'There are 10 apples. 6 of the apples are blue. 4 of the apples are red. Are more than half of the apples red? Answer with only one word, true or false.', 'target': 'false'}
{'input': 'There are 10 apples. 5 of the apples are blue. 5 of the apples are red. Are more than half of the apples red? Answer with only one word, true or false.', 'target': 'false'}
{'input': 'There are 10 apples. 4 of the apples are blue. 6 of the apples are red. Are more than half of the apples red? Answer with only one word, true or false.', 'target': 'true'}
{'input': 'There are 10 apples. 3 of the apples are blue. 7 of the apples are red. Are more than half of the apples red? Answer with only one word, true or false.', 'target': 'true'}
{'input': 'There are 10 tables. 7 of the tables are blue. 3 of the tables are red. Are at least 4 of the tables red? Answer with only one word, true or false.', 'target': 'false'}
{'input': 'There are 10 tables. 6 of the tables are blue. 4 of the tables are red. Are at least 4 of the tables red? Answer with only one word, true or false.', 'target': 'true'}
{'input': 'There are 10 tables. 5 of the tables are blue. 5 of the tables are red. Are at least 4 of the tables red? Answer with only one word, true or false.', 'target': 'true'}
{'input': 'There are 10 tables. 4 of the tables are blue. 6 of the tables are red. Are at least 4 of the tables red? Answer with only one word, true or false.', 'target': 'true'}
{'input': 'There are 10 tables. 3 of the tables are blue. 7 of the tables are red. Are at least 4 of the tables red? Answer with only one word, true or false.', 'target': 'true'}
{'input': 'There are 10 chairs. 7 of the chairs are blue. 3 of the chairs are red. Are at least 4 of the chairs red? Answer with only one word, true or false.', 'target': 'false'}
{'input': 'There are 10 chairs. 6 of the chairs are blue. 4 of the chairs are red. Are at least 4 of the chairs red? Answer with only one word, true or false.', 'target': 'true'}
{'input': 'There are 10 chairs. 5 of the chairs are blue. 5 of the chairs are red. Are at least 4 of the chairs red? Answer with only one word, true or false.', 'target': 'true'}
{'input': 'There are 10 chairs. 4 of the chairs are blue. 6 of the chairs are red. Are at least 4 of the chairs red? Answer with only one word, true or false.', 'target': 'true'}
{'input': 'There are 10 chairs. 3 of the chairs are blue. 7 of the chairs are red. Are at least 4 of the chairs red? Answer with only one word, true or false.', 'target': 'true'}
{'input': 'There are 10 circles. 7 of the circles are blue. 3 of the circles are red. Are at least 4 of the circles red? Answer with only one word, true or false.', 'target': 'false'}
{'input': 'There are 10 circles. 6 of the circles are blue. 4 of the circles are red. Are at least 4 of the circles red? Answer with only one word, true or false.', 'target': 'true'}
{'input': 'There are 10 circles. 5 of the circles are blue. 5 of the circles are red. Are at least 4 of the circles red? Answer with only one word, true or false.', 'target': 'true'}
{'input': 'There are 10 circles. 4 of the circles are blue. 6 of the circles are red. Are at least 4 of the circles red? Answer with only one word, true or false.', 'target': 'true'}
{'input': 'There are 10 circles. 3 of the circles are blue. 7 of the circles are red. Are at least 4 of the circles red? Answer with only one word, true or false.', 'target': 'true'}
{'input': 'There are 10 squares. 7 of the squares are blue. 3 of the squares are red. Are at least 4 of the squares red? Answer with only one word, true or false.', 'target': 'false'}
{'input': 'There are 10 squares. 6 of the squares are blue. 4 of the squares are red. Are at least 4 of the squares red? Answer with only one word, true or false.', 'target': 'true'}
{'input': 'There are 10 squares. 5 of the squares are blue. 5 of the squares are red. Are at least 4 of the squares red? Answer with only one word, true or false.', 'target': 'true'}
{'input': 'There are 10 squares. 4 of the squares are blue. 6 of the squares are red. Are at least 4 of the squares red? Answer with only one word, true or false.', 'target': 'true'}
{'input': 'There are 10 squares. 3 of the squares are blue. 7 of the squares are red. Are at least 4 of the squares red? Answer with only one word, true or false.', 'target': 'true'}
{'input': 'There are 10 apples. 7 of the apples are blue. 3 of the apples are red. Are at least 4 of the apples red? Answer with only one word, true or false.', 'target': 'false'}
{'input': 'There are 10 apples. 6 of the apples are blue. 4 of the apples are red. Are at least 4 of the apples red? Answer with only one word, true or false.', 'target': 'true'}
{'input': 'There are 10 apples. 5 of the apples are blue. 5 of the apples are red. Are at least 4 of the apples red? Answer with only one word, true or false.', 'target': 'true'}
{'input': 'There are 10 apples. 4 of the apples are blue. 6 of the apples are red. Are at least 4 of the apples red? Answer with only one word, true or false.', 'target': 'true'}
{'input': 'There are 10 apples. 3 of the apples are blue. 7 of the apples are red. Are at least 4 of the apples red? Answer with only one word, true or false.', 'target': 'true'}
{'input': 'There are 10 tables. 7 of the tables are blue. 3 of the tables are red. Are at most 4 of the tables red? Answer with only one word, true or false.', 'target': 'true'}
{'input': 'There are 10 tables. 6 of the tables are blue. 4 of the tables are red. Are at most 4 of the tables red? Answer with only one word, true or false.', 'target': 'true'}
{'input': 'There are 10 tables. 5 of the tables are blue. 5 of the tables are red. Are at most 4 of the tables red? Answer with only one word, true or false.', 'target': 'false'}
{'input': 'There are 10 tables. 4 of the tables are blue. 6 of the tables are red. Are at most 4 of the tables red? Answer with only one word, true or false.', 'target': 'false'}
{'input': 'There are 10 tables. 3 of the tables are blue. 7 of the tables are red. Are at most 4 of the tables red? Answer with only one word, true or false.', 'target': 'false'}
{'input': 'There are 10 chairs. 7 of the chairs are blue. 3 of the chairs are red. Are at most 4 of the chairs red? Answer with only one word, true or false.', 'target': 'true'}
{'input': 'There are 10 chairs. 6 of the chairs are blue. 4 of the chairs are red. Are at most 4 of the chairs red? Answer with only one word, true or false.', 'target': 'true'}
{'input': 'There are 10 chairs. 5 of the chairs are blue. 5 of the chairs are red. Are at most 4 of the chairs red? Answer with only one word, true or false.', 'target': 'false'}
{'input': 'There are 10 chairs. 4 of the chairs are blue. 6 of the chairs are red. Are at most 4 of the chairs red? Answer with only one word, true or false.', 'target': 'false'}
{'input': 'There are 10 chairs. 3 of the chairs are blue. 7 of the chairs are red. Are at most 4 of the chairs red? Answer with only one word, true or false.', 'target': 'false'}
{'input': 'There are 10 circles. 7 of the circles are blue. 3 of the circles are red. Are at most 4 of the circles red? Answer with only one word, true or false.', 'target': 'true'}
{'input': 'There are 10 circles. 6 of the circles are blue. 4 of the circles are red. Are at most 4 of the circles red? Answer with only one word, true or false.', 'target': 'true'}
{'input': 'There are 10 circles. 5 of the circles are blue. 5 of the circles are red. Are at most 4 of the circles red? Answer with only one word, true or false.', 'target': 'false'}
{'input': 'There are 10 circles. 4 of the circles are blue. 6 of the circles are red. Are at most 4 of the circles red? Answer with only one word, true or false.', 'target': 'false'}
{'input': 'There are 10 circles. 3 of the circles are blue. 7 of the circles are red. Are at most 4 of the circles red? Answer with only one word, true or false.', 'target': 'false'}
{'input': 'There are 10 squares. 7 of the squares are blue. 3 of the squares are red. Are at most 4 of the squares red? Answer with only one word, true or false.', 'target': 'true'}
{'input': 'There are 10 squares. 6 of the squares are blue. 4 of the squares are red. Are at most 4 of the squares red? Answer with only one word, true or false.', 'target': 'true'}
{'input': 'There are 10 squares. 5 of the squares are blue. 5 of the squares are red. Are at most 4 of the squares red? Answer with only one word, true or false.', 'target': 'false'}
{'input': 'There are 10 squares. 4 of the squares are blue. 6 of the squares are red. Are at most 4 of the squares red? Answer with only one word, true or false.', 'target': 'false'}
{'input': 'There are 10 squares. 3 of the squares are blue. 7 of the squares are red. Are at most 4 of the squares red? Answer with only one word, true or false.', 'target': 'false'}
{'input': 'There are 10 apples. 7 of the apples are blue. 3 of the apples are red. Are at most 4 of the apples red? Answer with only one word, true or false.', 'target': 'true'}
{'input': 'There are 10 apples. 6 of the apples are blue. 4 of the apples are red. Are at most 4 of the apples red? Answer with only one word, true or false.', 'target': 'true'}
{'input': 'There are 10 apples. 5 of the apples are blue. 5 of the apples are red. Are at most 4 of the apples red? Answer with only one word, true or false.', 'target': 'false'}
{'input': 'There are 10 apples. 4 of the apples are blue. 6 of the apples are red. Are at most 4 of the apples red? Answer with only one word, true or false.', 'target': 'false'}
{'input': 'There are 10 apples. 3 of the apples are blue. 7 of the apples are red. Are at most 4 of the apples red? Answer with only one word, true or false.', 'target': 'false'}
Loading