Skip to content
This repository has been archived by the owner on Jul 23, 2024. It is now read-only.

Commit

Permalink
Merge pull request #41 from GenBench/force-merge-nl_codesearch_clf
Browse files Browse the repository at this point in the history
[Task Submission] Natural Language Codesearch Classification (`nl_codesearch_clf`)
  • Loading branch information
kazemnejad authored Dec 31, 2023
2 parents 382adcf + aed4787 commit d4db498
Show file tree
Hide file tree
Showing 38 changed files with 1,390 additions and 0 deletions.
Binary file not shown.
5 changes: 5 additions & 0 deletions src/genbench/tasks/nl_codesearch_clf/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from genbench import TaskDict


class NlCodesearchClf(TaskDict):
pass
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
{
name: 'Natural Language Codesearch Classification (codesearchnet_adv)',

description: 'Natural Language Codesearch Classification (codesearchnet_adv) aims to measure the generalization capabilites of language models in code understanding. This subtasks measures robustness against covariate shifts',

keywords: [
'codesearch',
'natural language query',
'binary classification',
'python',
'robustness',
'covariate shift',
],

authors: [
'Andor Diera',
'Abdelhalim Dahou',
'Lukas Galke',
'Fabian Karl',
'Florian Sihler',
'Ansgar Scherp',
],

data_source: {
type: 'manual',
test: 'https://zenodo.org/record/8310891/files/test_adv.jsonl',
train:'https://zenodo.org/record/8310891/files/train_adv.jsonl',
},

has_validation_set: false,
has_train_set: true,

task_type: 'multiple_choice',

evaluation_metrics: [
{
hf_id: 'accuracy',
git_commit_sha: '34d6add55811828baef83e0d7c6826e2193f7b6a',
best_score: 1.0,
},
],

preparation_strategies: {
finetuning: {
objective: 'maximum_likelihood',
},

prompt_based_testing: {
prompt_builder: {
instruction_zero_shot: 'Given a code comment and a Python programming language code snippet, determine if the comment accurately represents the function of the code. Respond with True if the code matches the comment and False if it does not. The input format is defined as comment [CODESPLIT] code',
input_prefix: '',
output_prefix: '',
choices_prefix: '',
append_choices_to_input: false,
}
},
},
}
19 changes: 19 additions & 0 deletions src/genbench/tasks/nl_codesearch_clf/codesearchnet_adv/doc.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Natural Language Codesearch Classification (codesearchnet_adv)

## Abstract
*Copy the abstract of your accompanying paper for this task here Natural Language Codesearch Classification (codesearchnet_adv).*

## Examples
*Give some examples of the Natural Language Codesearch Classification (codesearchnet_adv).*

## Usage
*Describe how to load your task and what is required for evaluation, if anything.*

## Data Source
*Describe the data source for this Natural Language Codesearch Classification (codesearchnet_adv).*

## Limitations and Bias
*Note any known limitations or biases that the Natural Language Codesearch Classification (codesearchnet_adv) has, with links and references if possible.*

## GenBench Eval card
*Describe what kind of generalisation your task is evaluating, and include a [genbench eval card](https://genbench.org/eval_cards/) for your task*.
46 changes: 46 additions & 0 deletions src/genbench/tasks/nl_codesearch_clf/codesearchnet_adv/task.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import random
from typing import Dict

import datasets

from genbench import Task


class NlCodesearchClfCodesearchnetAdv(Task):
def get_dataset_raw(self) -> Dict[str, datasets.Dataset]:
"""Create the dataset adding a negative sample for each code comment/query
Returns:
A dictionary containing key-value pairs for the raw datasets.
The keys are strings representing the name of the dataset split
(e.g., "train", "validation", "test") and the values are
HuggingFace `datasets.Dataset` objects containing the original pair and the distractors for the test split.
The train split only contains the original dataset.
"""
# Load the raw datasets
raw_datasets: Dict[str, datasets.Dataset] = self._load_data_source()
output: Dict[str, datasets.Dataset] = {}
# Set random seed for consistency
random.seed(42)
for split, dataset in raw_datasets.items():
if split == "test" or split == "train":
new_dataset = datasets.Dataset.from_dict({})
for item in dataset:
# Add comment-code pair to new dataset
new_dataset = new_dataset.add_item(item)
other_items = [other_item for other_item in dataset if other_item != item]
# Randomly select other item
random_item = random.sample(other_items, 1)
# Split input into comment and code
input_parts = item["input"].split("[CODESPLIT]")
# Split random input into comment and code
random_input_parts = random_item[0]["input"].split("[CODESPLIT]")
# Combine the "input" fields of the original and random items
new_input = input_parts[0] + "[CODESPLIT]" + random_input_parts[1]
new_item = {"input": new_input, "target": 0, "target_options": item["target_options"]}
# Add negative sample comment-code pair to new dataset
new_dataset = new_dataset.add_item(new_item)
output[split] = new_dataset
else:
output[split] = dataset
return output
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
{
name: 'Natural Language Codesearch Classification (codesearchnet_go)',

description: 'Natural Language Codesearch Classification (codesearchnet_go) aims to measure the generalization capabilites of language models in code understanding. This subtasks measures cross-lingual generalization',

keywords: [
'codesearch',
'natural language query',
'binary classification',
'go',
'cross-lingual'
],

authors: [
'Andor Diera',
'Abdelhalim Dahou',
'Lukas Galke',
'Fabian Karl',
'Florian Sihler',
'Ansgar Scherp',
],

data_source: {
type: 'manual',
test: 'https://zenodo.org/record/8310891/files/test_go.jsonl',
train:'https://zenodo.org/record/8310891/files/train_adv.jsonl',
},

has_validation_set: false,
has_train_set: true,

task_type: 'multiple_choice',

evaluation_metrics: [
{
hf_id: 'accuracy',
git_commit_sha: '34d6add55811828baef83e0d7c6826e2193f7b6a',
best_score: 1.0,
},
],

preparation_strategies: {
finetuning: {
objective: 'maximum_likelihood',
},
prompt_based_testing: {
prompt_builder: {
instruction_zero_shot: 'Given a code comment and a Go programming language code snippet, determine if the comment accurately represents the function of the code. Respond with True if the code matches the comment and False if it does not. The input format is defined as comment [CODESPLIT] code',
input_prefix: '',
output_prefix: '',
choices_prefix: '',
append_choices_to_input: false,
}
},
},
}
19 changes: 19 additions & 0 deletions src/genbench/tasks/nl_codesearch_clf/codesearchnet_go/doc.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Natural Language Codesearch Classification (codesearchnet_go)

## Abstract
*Copy the abstract of your accompanying paper for this task here Natural Language Codesearch Classification (codesearchnet_go).*

## Examples
*Give some examples of the Natural Language Codesearch Classification (codesearchnet_go).*

## Usage
*Describe how to load your task and what is required for evaluation, if anything.*

## Data Source
*Describe the data source for this Natural Language Codesearch Classification (codesearchnet_go).*

## Limitations and Bias
*Note any known limitations or biases that the Natural Language Codesearch Classification (codesearchnet_go) has, with links and references if possible.*

## GenBench Eval card
*Describe what kind of generalisation your task is evaluating, and include a [genbench eval card](https://genbench.org/eval_cards/) for your task*.
46 changes: 46 additions & 0 deletions src/genbench/tasks/nl_codesearch_clf/codesearchnet_go/task.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import random
from typing import Dict

import datasets

from genbench import Task


class NlCodesearchClfCodesearchnetGo(Task):
def get_dataset_raw(self) -> Dict[str, datasets.Dataset]:
"""Create the dataset adding a negative sample for each code comment/query
Returns:
A dictionary containing key-value pairs for the raw datasets.
The keys are strings representing the name of the dataset split
(e.g., "train", "validation", "test") and the values are
HuggingFace `datasets.Dataset` objects containing the original pair and the distractors for the test split.
The train split only contains the original dataset.
"""
# Load the raw datasets
raw_datasets: Dict[str, datasets.Dataset] = self._load_data_source()
output: Dict[str, datasets.Dataset] = {}
# Set random seed for consistency
random.seed(42)
for split, dataset in raw_datasets.items():
if split == "test":
new_dataset = datasets.Dataset.from_dict({})
for item in dataset:
# Add comment-code pair to new dataset
new_dataset = new_dataset.add_item(item)
other_items = [other_item for other_item in dataset if other_item != item]
# Randomly select other item
random_item = random.sample(other_items, 1)
# Split input into comment and code
input_parts = item["input"].split("[CODESPLIT]")
# Split random input into comment and code
random_input_parts = random_item[0]["input"].split("[CODESPLIT]")
# Combine the "input" fields of the original and random items
new_input = input_parts[0] + "[CODESPLIT]" + random_input_parts[1]
new_item = {"input": new_input, "target": 0, "target_options": item["target_options"]}
# Add negative sample comment-code pair to new dataset
new_dataset = new_dataset.add_item(new_item)
output[split] = new_dataset
else:
output[split] = dataset
return output
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
{
name: 'Natural Language Codesearch Classification (codesearchnet_java)',

description: 'Natural Language Codesearch Classification (codesearchnet_java) aims to measure the generalization capabilites of language models in code understanding. This subtasks measures cross-lingual generalization',

keywords: [
'codesearch',
'natural language query',
'binary classification',
'java',
'cross-lingual'
],

authors: [
'Andor Diera',
'Abdelhalim Dahou',
'Lukas Galke',
'Fabian Karl',
'Florian Sihler',
'Ansgar Scherp',
],

data_source: {
type: 'manual',
test: 'https://zenodo.org/record/8310891/files/test_java.jsonl',
train:'https://zenodo.org/record/8310891/files/train_adv.jsonl',
},

has_validation_set: false,
has_train_set: true,

task_type: 'multiple_choice',

evaluation_metrics: [
{
hf_id: 'accuracy',
git_commit_sha: '34d6add55811828baef83e0d7c6826e2193f7b6a',
best_score: 1.0,
},
],

preparation_strategies: {
finetuning: {
objective: 'maximum_likelihood',
},
prompt_based_testing: {
prompt_builder: {
instruction_zero_shot: 'Given a code comment and a Java programming language code snippet, determine if the comment accurately represents the function of the code. Respond with True if the code matches the comment and False if it does not. The input format is defined as comment [CODESPLIT] code',
input_prefix: '',
output_prefix: '',
choices_prefix: '',
append_choices_to_input: false,
}
},
},
}
19 changes: 19 additions & 0 deletions src/genbench/tasks/nl_codesearch_clf/codesearchnet_java/doc.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Natural Language Codesearch Classification (codesearchnet_java)

## Abstract
*Copy the abstract of your accompanying paper for this task here Natural Language Codesearch Classification (codesearchnet_java).*

## Examples
*Give some examples of the Natural Language Codesearch Classification (codesearchnet_java).*

## Usage
*Describe how to load your task and what is required for evaluation, if anything.*

## Data Source
*Describe the data source for this Natural Language Codesearch Classification (codesearchnet_java).*

## Limitations and Bias
*Note any known limitations or biases that the Natural Language Codesearch Classification (codesearchnet_java) has, with links and references if possible.*

## GenBench Eval card
*Describe what kind of generalisation your task is evaluating, and include a [genbench eval card](https://genbench.org/eval_cards/) for your task*.
46 changes: 46 additions & 0 deletions src/genbench/tasks/nl_codesearch_clf/codesearchnet_java/task.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import random
from typing import Dict

import datasets

from genbench import Task


class NlCodesearchClfCodesearchnetJava(Task):
def get_dataset_raw(self) -> Dict[str, datasets.Dataset]:
"""Create the dataset adding a negative sample for each code comment/query
Returns:
A dictionary containing key-value pairs for the raw datasets.
The keys are strings representing the name of the dataset split
(e.g., "train", "validation", "test") and the values are
HuggingFace `datasets.Dataset` objects containing the original pair and the distractors for the test split.
The train split only contains the original dataset.
"""
# Load the raw datasets
raw_datasets: Dict[str, datasets.Dataset] = self._load_data_source()
output: Dict[str, datasets.Dataset] = {}
# Set random seed for consistency
random.seed(42)
for split, dataset in raw_datasets.items():
if split == "test":
new_dataset = datasets.Dataset.from_dict({})
for item in dataset:
# Add comment-code pair to new dataset
new_dataset = new_dataset.add_item(item)
other_items = [other_item for other_item in dataset if other_item != item]
# Randomly select other item
random_item = random.sample(other_items, 1)
# Split input into comment and code
input_parts = item["input"].split("[CODESPLIT]")
# Split random input into comment and code
random_input_parts = random_item[0]["input"].split("[CODESPLIT]")
# Combine the "input" fields of the original and random items
new_input = input_parts[0] + "[CODESPLIT]" + random_input_parts[1]
new_item = {"input": new_input, "target": 0, "target_options": item["target_options"]}
# Add negative sample comment-code pair to new dataset
new_dataset = new_dataset.add_item(new_item)
output[split] = new_dataset
else:
output[split] = dataset
return output
Empty file.
Loading

0 comments on commit d4db498

Please sign in to comment.