This repository has been archived by the owner on Jul 23, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 18
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #41 from GenBench/force-merge-nl_codesearch_clf
[Task Submission] Natural Language Codesearch Classification (`nl_codesearch_clf`)
- Loading branch information
Showing
38 changed files
with
1,390 additions
and
0 deletions.
There are no files selected for viewing
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
from genbench import TaskDict | ||
|
||
|
||
class NlCodesearchClf(TaskDict): | ||
pass |
Empty file.
58 changes: 58 additions & 0 deletions
58
src/genbench/tasks/nl_codesearch_clf/codesearchnet_adv/config.jsonnet
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
{ | ||
name: 'Natural Language Codesearch Classification (codesearchnet_adv)', | ||
|
||
description: 'Natural Language Codesearch Classification (codesearchnet_adv) aims to measure the generalization capabilites of language models in code understanding. This subtasks measures robustness against covariate shifts', | ||
|
||
keywords: [ | ||
'codesearch', | ||
'natural language query', | ||
'binary classification', | ||
'python', | ||
'robustness', | ||
'covariate shift', | ||
], | ||
|
||
authors: [ | ||
'Andor Diera', | ||
'Abdelhalim Dahou', | ||
'Lukas Galke', | ||
'Fabian Karl', | ||
'Florian Sihler', | ||
'Ansgar Scherp', | ||
], | ||
|
||
data_source: { | ||
type: 'manual', | ||
test: 'https://zenodo.org/record/8310891/files/test_adv.jsonl', | ||
train:'https://zenodo.org/record/8310891/files/train_adv.jsonl', | ||
}, | ||
|
||
has_validation_set: false, | ||
has_train_set: true, | ||
|
||
task_type: 'multiple_choice', | ||
|
||
evaluation_metrics: [ | ||
{ | ||
hf_id: 'accuracy', | ||
git_commit_sha: '34d6add55811828baef83e0d7c6826e2193f7b6a', | ||
best_score: 1.0, | ||
}, | ||
], | ||
|
||
preparation_strategies: { | ||
finetuning: { | ||
objective: 'maximum_likelihood', | ||
}, | ||
|
||
prompt_based_testing: { | ||
prompt_builder: { | ||
instruction_zero_shot: 'Given a code comment and a Python programming language code snippet, determine if the comment accurately represents the function of the code. Respond with True if the code matches the comment and False if it does not. The input format is defined as comment [CODESPLIT] code', | ||
input_prefix: '', | ||
output_prefix: '', | ||
choices_prefix: '', | ||
append_choices_to_input: false, | ||
} | ||
}, | ||
}, | ||
} |
19 changes: 19 additions & 0 deletions
19
src/genbench/tasks/nl_codesearch_clf/codesearchnet_adv/doc.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
# Natural Language Codesearch Classification (codesearchnet_adv) | ||
|
||
## Abstract | ||
*Copy the abstract of your accompanying paper for this task here Natural Language Codesearch Classification (codesearchnet_adv).* | ||
|
||
## Examples | ||
*Give some examples of the Natural Language Codesearch Classification (codesearchnet_adv).* | ||
|
||
## Usage | ||
*Describe how to load your task and what is required for evaluation, if anything.* | ||
|
||
## Data Source | ||
*Describe the data source for this Natural Language Codesearch Classification (codesearchnet_adv).* | ||
|
||
## Limitations and Bias | ||
*Note any known limitations or biases that the Natural Language Codesearch Classification (codesearchnet_adv) has, with links and references if possible.* | ||
|
||
## GenBench Eval card | ||
*Describe what kind of generalisation your task is evaluating, and include a [genbench eval card](https://genbench.org/eval_cards/) for your task*. |
46 changes: 46 additions & 0 deletions
46
src/genbench/tasks/nl_codesearch_clf/codesearchnet_adv/task.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
import random | ||
from typing import Dict | ||
|
||
import datasets | ||
|
||
from genbench import Task | ||
|
||
|
||
class NlCodesearchClfCodesearchnetAdv(Task): | ||
def get_dataset_raw(self) -> Dict[str, datasets.Dataset]: | ||
"""Create the dataset adding a negative sample for each code comment/query | ||
Returns: | ||
A dictionary containing key-value pairs for the raw datasets. | ||
The keys are strings representing the name of the dataset split | ||
(e.g., "train", "validation", "test") and the values are | ||
HuggingFace `datasets.Dataset` objects containing the original pair and the distractors for the test split. | ||
The train split only contains the original dataset. | ||
""" | ||
# Load the raw datasets | ||
raw_datasets: Dict[str, datasets.Dataset] = self._load_data_source() | ||
output: Dict[str, datasets.Dataset] = {} | ||
# Set random seed for consistency | ||
random.seed(42) | ||
for split, dataset in raw_datasets.items(): | ||
if split == "test" or split == "train": | ||
new_dataset = datasets.Dataset.from_dict({}) | ||
for item in dataset: | ||
# Add comment-code pair to new dataset | ||
new_dataset = new_dataset.add_item(item) | ||
other_items = [other_item for other_item in dataset if other_item != item] | ||
# Randomly select other item | ||
random_item = random.sample(other_items, 1) | ||
# Split input into comment and code | ||
input_parts = item["input"].split("[CODESPLIT]") | ||
# Split random input into comment and code | ||
random_input_parts = random_item[0]["input"].split("[CODESPLIT]") | ||
# Combine the "input" fields of the original and random items | ||
new_input = input_parts[0] + "[CODESPLIT]" + random_input_parts[1] | ||
new_item = {"input": new_input, "target": 0, "target_options": item["target_options"]} | ||
# Add negative sample comment-code pair to new dataset | ||
new_dataset = new_dataset.add_item(new_item) | ||
output[split] = new_dataset | ||
else: | ||
output[split] = dataset | ||
return output |
Empty file.
56 changes: 56 additions & 0 deletions
56
src/genbench/tasks/nl_codesearch_clf/codesearchnet_go/config.jsonnet
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
{ | ||
name: 'Natural Language Codesearch Classification (codesearchnet_go)', | ||
|
||
description: 'Natural Language Codesearch Classification (codesearchnet_go) aims to measure the generalization capabilites of language models in code understanding. This subtasks measures cross-lingual generalization', | ||
|
||
keywords: [ | ||
'codesearch', | ||
'natural language query', | ||
'binary classification', | ||
'go', | ||
'cross-lingual' | ||
], | ||
|
||
authors: [ | ||
'Andor Diera', | ||
'Abdelhalim Dahou', | ||
'Lukas Galke', | ||
'Fabian Karl', | ||
'Florian Sihler', | ||
'Ansgar Scherp', | ||
], | ||
|
||
data_source: { | ||
type: 'manual', | ||
test: 'https://zenodo.org/record/8310891/files/test_go.jsonl', | ||
train:'https://zenodo.org/record/8310891/files/train_adv.jsonl', | ||
}, | ||
|
||
has_validation_set: false, | ||
has_train_set: true, | ||
|
||
task_type: 'multiple_choice', | ||
|
||
evaluation_metrics: [ | ||
{ | ||
hf_id: 'accuracy', | ||
git_commit_sha: '34d6add55811828baef83e0d7c6826e2193f7b6a', | ||
best_score: 1.0, | ||
}, | ||
], | ||
|
||
preparation_strategies: { | ||
finetuning: { | ||
objective: 'maximum_likelihood', | ||
}, | ||
prompt_based_testing: { | ||
prompt_builder: { | ||
instruction_zero_shot: 'Given a code comment and a Go programming language code snippet, determine if the comment accurately represents the function of the code. Respond with True if the code matches the comment and False if it does not. The input format is defined as comment [CODESPLIT] code', | ||
input_prefix: '', | ||
output_prefix: '', | ||
choices_prefix: '', | ||
append_choices_to_input: false, | ||
} | ||
}, | ||
}, | ||
} |
19 changes: 19 additions & 0 deletions
19
src/genbench/tasks/nl_codesearch_clf/codesearchnet_go/doc.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
# Natural Language Codesearch Classification (codesearchnet_go) | ||
|
||
## Abstract | ||
*Copy the abstract of your accompanying paper for this task here Natural Language Codesearch Classification (codesearchnet_go).* | ||
|
||
## Examples | ||
*Give some examples of the Natural Language Codesearch Classification (codesearchnet_go).* | ||
|
||
## Usage | ||
*Describe how to load your task and what is required for evaluation, if anything.* | ||
|
||
## Data Source | ||
*Describe the data source for this Natural Language Codesearch Classification (codesearchnet_go).* | ||
|
||
## Limitations and Bias | ||
*Note any known limitations or biases that the Natural Language Codesearch Classification (codesearchnet_go) has, with links and references if possible.* | ||
|
||
## GenBench Eval card | ||
*Describe what kind of generalisation your task is evaluating, and include a [genbench eval card](https://genbench.org/eval_cards/) for your task*. |
46 changes: 46 additions & 0 deletions
46
src/genbench/tasks/nl_codesearch_clf/codesearchnet_go/task.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
import random | ||
from typing import Dict | ||
|
||
import datasets | ||
|
||
from genbench import Task | ||
|
||
|
||
class NlCodesearchClfCodesearchnetGo(Task): | ||
def get_dataset_raw(self) -> Dict[str, datasets.Dataset]: | ||
"""Create the dataset adding a negative sample for each code comment/query | ||
Returns: | ||
A dictionary containing key-value pairs for the raw datasets. | ||
The keys are strings representing the name of the dataset split | ||
(e.g., "train", "validation", "test") and the values are | ||
HuggingFace `datasets.Dataset` objects containing the original pair and the distractors for the test split. | ||
The train split only contains the original dataset. | ||
""" | ||
# Load the raw datasets | ||
raw_datasets: Dict[str, datasets.Dataset] = self._load_data_source() | ||
output: Dict[str, datasets.Dataset] = {} | ||
# Set random seed for consistency | ||
random.seed(42) | ||
for split, dataset in raw_datasets.items(): | ||
if split == "test": | ||
new_dataset = datasets.Dataset.from_dict({}) | ||
for item in dataset: | ||
# Add comment-code pair to new dataset | ||
new_dataset = new_dataset.add_item(item) | ||
other_items = [other_item for other_item in dataset if other_item != item] | ||
# Randomly select other item | ||
random_item = random.sample(other_items, 1) | ||
# Split input into comment and code | ||
input_parts = item["input"].split("[CODESPLIT]") | ||
# Split random input into comment and code | ||
random_input_parts = random_item[0]["input"].split("[CODESPLIT]") | ||
# Combine the "input" fields of the original and random items | ||
new_input = input_parts[0] + "[CODESPLIT]" + random_input_parts[1] | ||
new_item = {"input": new_input, "target": 0, "target_options": item["target_options"]} | ||
# Add negative sample comment-code pair to new dataset | ||
new_dataset = new_dataset.add_item(new_item) | ||
output[split] = new_dataset | ||
else: | ||
output[split] = dataset | ||
return output |
Empty file.
56 changes: 56 additions & 0 deletions
56
src/genbench/tasks/nl_codesearch_clf/codesearchnet_java/config.jsonnet
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
{ | ||
name: 'Natural Language Codesearch Classification (codesearchnet_java)', | ||
|
||
description: 'Natural Language Codesearch Classification (codesearchnet_java) aims to measure the generalization capabilites of language models in code understanding. This subtasks measures cross-lingual generalization', | ||
|
||
keywords: [ | ||
'codesearch', | ||
'natural language query', | ||
'binary classification', | ||
'java', | ||
'cross-lingual' | ||
], | ||
|
||
authors: [ | ||
'Andor Diera', | ||
'Abdelhalim Dahou', | ||
'Lukas Galke', | ||
'Fabian Karl', | ||
'Florian Sihler', | ||
'Ansgar Scherp', | ||
], | ||
|
||
data_source: { | ||
type: 'manual', | ||
test: 'https://zenodo.org/record/8310891/files/test_java.jsonl', | ||
train:'https://zenodo.org/record/8310891/files/train_adv.jsonl', | ||
}, | ||
|
||
has_validation_set: false, | ||
has_train_set: true, | ||
|
||
task_type: 'multiple_choice', | ||
|
||
evaluation_metrics: [ | ||
{ | ||
hf_id: 'accuracy', | ||
git_commit_sha: '34d6add55811828baef83e0d7c6826e2193f7b6a', | ||
best_score: 1.0, | ||
}, | ||
], | ||
|
||
preparation_strategies: { | ||
finetuning: { | ||
objective: 'maximum_likelihood', | ||
}, | ||
prompt_based_testing: { | ||
prompt_builder: { | ||
instruction_zero_shot: 'Given a code comment and a Java programming language code snippet, determine if the comment accurately represents the function of the code. Respond with True if the code matches the comment and False if it does not. The input format is defined as comment [CODESPLIT] code', | ||
input_prefix: '', | ||
output_prefix: '', | ||
choices_prefix: '', | ||
append_choices_to_input: false, | ||
} | ||
}, | ||
}, | ||
} |
19 changes: 19 additions & 0 deletions
19
src/genbench/tasks/nl_codesearch_clf/codesearchnet_java/doc.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
# Natural Language Codesearch Classification (codesearchnet_java) | ||
|
||
## Abstract | ||
*Copy the abstract of your accompanying paper for this task here Natural Language Codesearch Classification (codesearchnet_java).* | ||
|
||
## Examples | ||
*Give some examples of the Natural Language Codesearch Classification (codesearchnet_java).* | ||
|
||
## Usage | ||
*Describe how to load your task and what is required for evaluation, if anything.* | ||
|
||
## Data Source | ||
*Describe the data source for this Natural Language Codesearch Classification (codesearchnet_java).* | ||
|
||
## Limitations and Bias | ||
*Note any known limitations or biases that the Natural Language Codesearch Classification (codesearchnet_java) has, with links and references if possible.* | ||
|
||
## GenBench Eval card | ||
*Describe what kind of generalisation your task is evaluating, and include a [genbench eval card](https://genbench.org/eval_cards/) for your task*. |
46 changes: 46 additions & 0 deletions
46
src/genbench/tasks/nl_codesearch_clf/codesearchnet_java/task.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
import random | ||
from typing import Dict | ||
|
||
import datasets | ||
|
||
from genbench import Task | ||
|
||
|
||
class NlCodesearchClfCodesearchnetJava(Task): | ||
def get_dataset_raw(self) -> Dict[str, datasets.Dataset]: | ||
"""Create the dataset adding a negative sample for each code comment/query | ||
Returns: | ||
A dictionary containing key-value pairs for the raw datasets. | ||
The keys are strings representing the name of the dataset split | ||
(e.g., "train", "validation", "test") and the values are | ||
HuggingFace `datasets.Dataset` objects containing the original pair and the distractors for the test split. | ||
The train split only contains the original dataset. | ||
""" | ||
# Load the raw datasets | ||
raw_datasets: Dict[str, datasets.Dataset] = self._load_data_source() | ||
output: Dict[str, datasets.Dataset] = {} | ||
# Set random seed for consistency | ||
random.seed(42) | ||
for split, dataset in raw_datasets.items(): | ||
if split == "test": | ||
new_dataset = datasets.Dataset.from_dict({}) | ||
for item in dataset: | ||
# Add comment-code pair to new dataset | ||
new_dataset = new_dataset.add_item(item) | ||
other_items = [other_item for other_item in dataset if other_item != item] | ||
# Randomly select other item | ||
random_item = random.sample(other_items, 1) | ||
# Split input into comment and code | ||
input_parts = item["input"].split("[CODESPLIT]") | ||
# Split random input into comment and code | ||
random_input_parts = random_item[0]["input"].split("[CODESPLIT]") | ||
# Combine the "input" fields of the original and random items | ||
new_input = input_parts[0] + "[CODESPLIT]" + random_input_parts[1] | ||
new_item = {"input": new_input, "target": 0, "target_options": item["target_options"]} | ||
# Add negative sample comment-code pair to new dataset | ||
new_dataset = new_dataset.add_item(new_item) | ||
output[split] = new_dataset | ||
else: | ||
output[split] = dataset | ||
return output |
Empty file.
Oops, something went wrong.