diff --git a/src/genbench/tasks/nl_codesearch_clf/codesearchnet_adv/task.py b/src/genbench/tasks/nl_codesearch_clf/codesearchnet_adv/task.py
index 57bf53a..4e77608 100644
--- a/src/genbench/tasks/nl_codesearch_clf/codesearchnet_adv/task.py
+++ b/src/genbench/tasks/nl_codesearch_clf/codesearchnet_adv/task.py
@@ -34,7 +34,7 @@ def get_dataset_raw(self) -> Dict[str, datasets.Dataset]:
                     # Split input into comment and code
                     input_parts = item["input"].split("[CODESPLIT]")
                     # Split random input into comment and code
-                    random_input_parts = random_item["input"].split("[CODESPLIT]")
+                    random_input_parts = random_item[0]["input"].split("[CODESPLIT]")
                     # Combine the "input" fields of the original and random items
                     new_input = input_parts[0] + "[CODESPLIT]" + random_input_parts[1]
                     new_item = {"input": new_input, "target": 0, "target_options": item["target_options"]}
diff --git a/src/genbench/tasks/nl_codesearch_clf/codesearchnet_go/task.py b/src/genbench/tasks/nl_codesearch_clf/codesearchnet_go/task.py
index 69eb7b5..9b880ec 100644
--- a/src/genbench/tasks/nl_codesearch_clf/codesearchnet_go/task.py
+++ b/src/genbench/tasks/nl_codesearch_clf/codesearchnet_go/task.py
@@ -34,7 +34,7 @@ def get_dataset_raw(self) -> Dict[str, datasets.Dataset]:
                     # Split input into comment and code
                     input_parts = item["input"].split("[CODESPLIT]")
                     # Split random input into comment and code
-                    random_input_parts = random_item["input"].split("[CODESPLIT]")
+                    random_input_parts = random_item[0]["input"].split("[CODESPLIT]")
                     # Combine the "input" fields of the original and random items
                     new_input = input_parts[0] + "[CODESPLIT]" + random_input_parts[1]
                     new_item = {"input": new_input, "target": 0, "target_options": item["target_options"]}
diff --git a/src/genbench/tasks/nl_codesearch_clf/codesearchnet_java/task.py b/src/genbench/tasks/nl_codesearch_clf/codesearchnet_java/task.py
index 80ff813..292e74c 100644
--- a/src/genbench/tasks/nl_codesearch_clf/codesearchnet_java/task.py
+++ b/src/genbench/tasks/nl_codesearch_clf/codesearchnet_java/task.py
@@ -34,7 +34,7 @@ def get_dataset_raw(self) -> Dict[str, datasets.Dataset]:
                     # Split input into comment and code
                     input_parts = item["input"].split("[CODESPLIT]")
                     # Split random input into comment and code
-                    random_input_parts = random_item["input"].split("[CODESPLIT]")
+                    random_input_parts = random_item[0]["input"].split("[CODESPLIT]")
                     # Combine the "input" fields of the original and random items
                     new_input = input_parts[0] + "[CODESPLIT]" + random_input_parts[1]
                     new_item = {"input": new_input, "target": 0, "target_options": item["target_options"]}
diff --git a/src/genbench/tasks/nl_codesearch_clf/codesearchnet_javascript/task.py b/src/genbench/tasks/nl_codesearch_clf/codesearchnet_javascript/task.py
index 0aa2a35..5e201a4 100644
--- a/src/genbench/tasks/nl_codesearch_clf/codesearchnet_javascript/task.py
+++ b/src/genbench/tasks/nl_codesearch_clf/codesearchnet_javascript/task.py
@@ -34,7 +34,7 @@ def get_dataset_raw(self) -> Dict[str, datasets.Dataset]:
                     # Split input into comment and code
                     input_parts = item["input"].split("[CODESPLIT]")
                     # Split random input into comment and code
-                    random_input_parts = random_item["input"].split("[CODESPLIT]")
+                    random_input_parts = random_item[0]["input"].split("[CODESPLIT]")
                     # Combine the "input" fields of the original and random items
                     new_input = input_parts[0] + "[CODESPLIT]" + random_input_parts[1]
                     new_item = {"input": new_input, "target": 0, "target_options": item["target_options"]}
diff --git a/src/genbench/tasks/nl_codesearch_clf/codesearchnet_php/task.py b/src/genbench/tasks/nl_codesearch_clf/codesearchnet_php/task.py
index afa9bb3..1378ff0 100644
--- a/src/genbench/tasks/nl_codesearch_clf/codesearchnet_php/task.py
+++ b/src/genbench/tasks/nl_codesearch_clf/codesearchnet_php/task.py
@@ -34,7 +34,7 @@ def get_dataset_raw(self) -> Dict[str, datasets.Dataset]:
                     # Split input into comment and code
                     input_parts = item["input"].split("[CODESPLIT]")
                     # Split random input into comment and code
-                    random_input_parts = random_item["input"].split("[CODESPLIT]")
+                    random_input_parts = random_item[0]["input"].split("[CODESPLIT]")
                     # Combine the "input" fields of the original and random items
                     new_input = input_parts[0] + "[CODESPLIT]" + random_input_parts[1]
                     new_item = {"input": new_input, "target": 0, "target_options": item["target_options"]}
diff --git a/src/genbench/tasks/nl_codesearch_clf/codesearchnet_ruby/task.py b/src/genbench/tasks/nl_codesearch_clf/codesearchnet_ruby/task.py
index 8941ffc..7f4db9b 100644
--- a/src/genbench/tasks/nl_codesearch_clf/codesearchnet_ruby/task.py
+++ b/src/genbench/tasks/nl_codesearch_clf/codesearchnet_ruby/task.py
@@ -34,7 +34,7 @@ def get_dataset_raw(self) -> Dict[str, datasets.Dataset]:
                     # Split input into comment and code
                     input_parts = item["input"].split("[CODESPLIT]")
                     # Split random input into comment and code
-                    random_input_parts = random_item["input"].split("[CODESPLIT]")
+                    random_input_parts = random_item[0]["input"].split("[CODESPLIT]")
                     # Combine the "input" fields of the original and random items
                     new_input = input_parts[0] + "[CODESPLIT]" + random_input_parts[1]
                     new_item = {"input": new_input, "target": 0, "target_options": item["target_options"]}
diff --git a/src/genbench/tasks/nl_codesearch_clf/cosqa/task.py b/src/genbench/tasks/nl_codesearch_clf/cosqa/task.py
index 8685f27..7d1c292 100644
--- a/src/genbench/tasks/nl_codesearch_clf/cosqa/task.py
+++ b/src/genbench/tasks/nl_codesearch_clf/cosqa/task.py
@@ -34,7 +34,7 @@ def get_dataset_raw(self) -> Dict[str, datasets.Dataset]:
                     # Split input into comment and code
                     input_parts = item["input"].split("[CODESPLIT]")
                     # Split random input into comment and code
-                    random_input_parts = random_item["input"].split("[CODESPLIT]")
+                    random_input_parts = random_item[0]["input"].split("[CODESPLIT]")
                     # Combine the "input" fields of the original and random items
                     new_input = input_parts[0] + "[CODESPLIT]" + random_input_parts[1]
                     new_item = {"input": new_input, "target": 0, "target_options": item["target_options"]}
diff --git a/src/genbench/tasks/nl_codesearch_clf/requirements-usage-example.txt b/src/genbench/tasks/nl_codesearch_clf/requirements-usage-example.txt
new file mode 100644
index 0000000..0a35dc8
--- /dev/null
+++ b/src/genbench/tasks/nl_codesearch_clf/requirements-usage-example.txt
@@ -0,0 +1,5 @@
+torch v. 2.1.0
+numpy v. 1.25.1
+tqdm v. 4.65.0
+transformers v. 4.32.0
+scikit-learn v. 1.3.0 
\ No newline at end of file
diff --git a/src/genbench/tasks/nl_codesearch_clf/statcodesearch/task.py b/src/genbench/tasks/nl_codesearch_clf/statcodesearch/task.py
index b0af739..5134760 100644
--- a/src/genbench/tasks/nl_codesearch_clf/statcodesearch/task.py
+++ b/src/genbench/tasks/nl_codesearch_clf/statcodesearch/task.py
@@ -34,7 +34,7 @@ def get_dataset_raw(self) -> Dict[str, datasets.Dataset]:
                     # Split input into comment and code
                     input_parts = item["input"].split("[CODESPLIT]")
                     # Split random input into comment and code
-                    random_input_parts = random_item["input"].split("[CODESPLIT]")
+                    random_input_parts = random_item[0]["input"].split("[CODESPLIT]")
                     # Combine the "input" fields of the original and random items
                     new_input = input_parts[0] + "[CODESPLIT]" + random_input_parts[1]
                     new_item = {"input": new_input, "target": 0, "target_options": item["target_options"]}
diff --git a/src/genbench/tasks/nl_codesearch_clf/usage_example.py b/src/genbench/tasks/nl_codesearch_clf/usage_example.py
new file mode 100644
index 0000000..4683d98
--- /dev/null
+++ b/src/genbench/tasks/nl_codesearch_clf/usage_example.py
@@ -0,0 +1,400 @@
+import argparse
+import json
+import logging
+from pathlib import Path
+
+import torch
+from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
+from torch.optim import AdamW
+from torch.utils.data import DataLoader
+from tqdm import tqdm
+from transformers import AutoModelForSequenceClassification, AutoTokenizer, PreTrainedModel, get_scheduler
+
+from genbench import TaskConfig
+from genbench.tasks.nl_codesearch_clf.codesearchnet_adv.task import NlCodesearchClfCodesearchnetAdv
+from genbench.tasks.nl_codesearch_clf.codesearchnet_go.task import NlCodesearchClfCodesearchnetGo
+from genbench.tasks.nl_codesearch_clf.codesearchnet_java.task import NlCodesearchClfCodesearchnetJava
+from genbench.tasks.nl_codesearch_clf.codesearchnet_javascript.task import NlCodesearchClfCodesearchnetJavascript
+from genbench.tasks.nl_codesearch_clf.codesearchnet_php.task import NlCodesearchClfCodesearchnetPhp
+from genbench.tasks.nl_codesearch_clf.codesearchnet_ruby.task import NlCodesearchClfCodesearchnetRuby
+from genbench.tasks.nl_codesearch_clf.cosqa.task import NlCodesearchClfCosqa
+from genbench.tasks.nl_codesearch_clf.statcodesearch.task import NlCodesearchClfStatcodesearch
+
+
+##########################################################
+# Data Loadig Utils
+##########################################################
+class Dataset(torch.utils.data.Dataset):
+    def __init__(self, features):
+        self.features = features
+
+    def __getitem__(self, index):
+        return self.features[index]
+
+    def __len__(self):
+        return len(self.features)
+
+
+def _truncate_seq_pair(tokens_a, tokens_b, max_length):
+    """Truncates a sequence pair in place to the maximum length."""
+
+    while True:
+        total_length = len(tokens_a) + len(tokens_b)
+        if total_length <= max_length:
+            break
+        if len(tokens_a) > len(tokens_b):
+            tokens_a.pop()
+        else:
+            tokens_b.pop()
+
+
+def _convert_examples_to_features(
+    comments,
+    codes,
+    labels,
+    max_seq_length,
+    tokenizer,
+    cls_token="[CLS]",
+    sep_token="[SEP]",
+    pad_token=0,
+    eos_token="</s>",
+    sequence_a_segment_id=0,
+    sequence_b_segment_id=1,
+    cls_token_segment_id=1,
+    pad_token_segment_id=0,
+    mask_padding_with_zero=True,
+):
+    features = []
+    for ex_index, (comment, code, label) in enumerate(zip(comments, codes, labels)):
+        # As was done in CodeBERT
+        tokens_comment = tokenizer.tokenize(comment)[:50]
+        tokens_code = tokenizer.tokenize(code)
+
+        # update max_seq_length to account for [CLS], [SEP], [SEP] tokens (-3)
+        n_special_tokens = 3
+        if cls_token is None:
+            n_special_tokens -= 1
+        s_max_seq_length = max_seq_length - n_special_tokens
+        _truncate_seq_pair(tokens_comment, tokens_code, s_max_seq_length)
+
+        # change sep for eos if no sep_token
+        if sep_token is None:
+            sep_token = eos_token
+
+        # [SEP] inbetween and at the end
+        tokens = tokens_comment + [sep_token] + tokens_code + [sep_token]
+        # CLS at the beginning
+        if cls_token is not None:
+            tokens = [cls_token] + tokens
+
+        input_ids = tokenizer.convert_tokens_to_ids(tokens)
+
+        # 1 for tokens, 0 for padding
+        input_mask = [1 if mask_padding_with_zero else 0] * len(input_ids)
+
+        # padding with 0 up to max_seq_length
+        padding_length = max_seq_length - len(input_ids)
+        input_ids = input_ids + ([pad_token] * padding_length)
+        input_mask = input_mask + ([0 if mask_padding_with_zero else 1] * padding_length)
+
+        # check
+        assert len(input_ids) == max_seq_length
+        assert len(input_mask) == max_seq_length
+
+        # convert to tensors
+        input_ids = torch.tensor(input_ids, dtype=torch.long)
+        input_mask = torch.tensor(input_mask, dtype=torch.long)
+        label = torch.tensor(label, dtype=torch.long)
+
+        features.append({"input_ids": input_ids, "attention_mask": input_mask, "labels": label})
+    return features
+
+
+def load_data(tokenizer, batch_size, seq_len, train_file, is_train):
+    # create dataset
+    comments = []
+    codes = []
+    labels = []
+    skipped = 0
+
+    is_sep_token_set = tokenizer.sep_token is not None
+    is_cls_token_set = tokenizer.cls_token is not None
+    is_pad_token_set = tokenizer.pad_token is not None
+    is_eos_token_set = tokenizer.eos_token is not None
+
+    for split, dataset in train_file.items():
+        if is_train and split == "test":
+            continue
+        if not is_train and split == "train":
+            continue
+        for sample in dataset:
+            try:
+                input = sample["input"]
+                # split at [CODESPLIT] token
+                input = input.split("[CODESPLIT]")
+                if len(input) != 2:
+                    # skip cases with more than one [SEP] token
+                    logging.warning(f"Input contains more than one [CODESPLIT] token: {input}")
+                    skipped += 1
+                    continue
+                # skip every sample that contains special tokens
+                if is_sep_token_set and (tokenizer.sep_token in input[0] or tokenizer.sep_token in input[1]):
+                    logging.warning(f"Input contains special tokens: {input}")
+                    skipped += 1
+                    continue
+                if is_cls_token_set and (tokenizer.cls_token in input[0] or tokenizer.cls_token in input[1]):
+                    logging.warning(f"Input contains special tokens: {input}")
+                    skipped += 1
+                    continue
+                if is_pad_token_set and (tokenizer.pad_token in input[0] or tokenizer.pad_token in input[1]):
+                    logging.warning(f"Input contains special tokens: {input}")
+                    skipped += 1
+                    continue
+                if is_eos_token_set and (tokenizer.eos_token in input[0] or tokenizer.eos_token in input[1]):
+                    logging.warning(f"Input contains special tokens: {input}")
+                    skipped += 1
+                    continue
+                comments.append(input[0])
+                codes.append(input[1])
+                labels.append(sample["target"])
+            except json.JSONDecodeError as e:
+                print(f"Error: JSON decoding failed - {e}")
+                continue
+    logging.info(f"Skipped {skipped} samples due to special tokens")
+    print("siker")
+    # tokenize
+    features = _convert_examples_to_features(
+        comments,
+        codes,
+        labels,
+        max_seq_length=seq_len,
+        tokenizer=tokenizer,
+        cls_token=tokenizer.cls_token,
+        sep_token=tokenizer.sep_token,
+        cls_token_segment_id=tokenizer.cls_token_id,
+        pad_token_segment_id=tokenizer.pad_token_id,
+        eos_token=tokenizer.eos_token,
+    )
+
+    # Convert to Dataset
+    features = Dataset(features)
+
+    return DataLoader(features, batch_size=batch_size, shuffle=True)
+
+
+##############################################################
+# Fine-tune Model
+##############################################################
+
+
+def train(model: PreTrainedModel, dataloader: DataLoader, args: argparse.Namespace):
+    """
+    Fine-tune the model.
+    :param model: the pretrained model to be fine-tuned
+    :param dataloader: an iterable data loader
+    :param args: training arguments (and also some other arguments)
+    :return: the fine-tuned model
+    """
+
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model.to(device)
+    model.train()
+
+    num_training_steps = args.epochs * len(dataloader)
+    progress_bar = tqdm(range(num_training_steps))
+
+    optimizer = AdamW(model.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay)
+    lr_scheduler = get_scheduler(
+        name="linear",
+        optimizer=optimizer,
+        num_warmup_steps=args.num_warmup_steps,
+        num_training_steps=num_training_steps,
+    )
+
+    for epoch in range(args.epochs):
+        for batch in dataloader:
+            batch = {k: v.to(device) for k, v in batch.items()}
+            outputs = model(**batch)
+            loss = outputs.loss
+            loss.backward()
+
+            optimizer.step()
+            lr_scheduler.step()
+            optimizer.zero_grad()
+            progress_bar.update(1)
+
+
+###########################################################
+# Evaluate Model
+###########################################################
+
+
+def clf(model, dataloader, args):
+    """Predict on test set."""
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model.to(device)
+    model.eval()
+    predictions = []
+    labels = []
+    logging.info("Evaluating...")
+    for batch in tqdm(dataloader):
+        batch = {k: v.to(device) for k, v in batch.items()}
+        with torch.no_grad():
+            outputs = model(**batch)
+            predictions.extend(outputs.logits.argmax(-1).cpu().numpy().tolist())
+            labels.extend(batch["labels"].cpu().numpy().tolist())
+
+    metrics = {}
+    # calc metrics
+
+    # calc accuracy
+    accuracy = accuracy_score(labels, predictions)
+    metrics["accuracy"] = accuracy
+
+    # calc precision
+    precision = precision_score(labels, predictions)
+    metrics["precision"] = precision
+
+    # calc recall
+    recall = recall_score(labels, predictions)
+    metrics["recall"] = recall
+
+    # calc f1
+    f1 = f1_score(labels, predictions)
+    metrics["f1"] = f1
+
+    return metrics
+
+
+##############################################################
+#  Run example
+##############################################################
+
+
+def main():
+    """Main function."""
+    # args
+    parser = argparse.ArgumentParser()
+    # parser.add_argument('--dataset', type=str, default='./codesearchnet_adv')
+    parser.add_argument("--model", default="roberta-base")
+    parser.add_argument("--epochs", type=int, default=5)
+    parser.add_argument("--batch_size", type=int, default=32)
+    parser.add_argument("--learning_rate", type=float, default=2e-5)
+    parser.add_argument("--weight_decay", type=float, default=0.01)
+    parser.add_argument("--num_warmup_steps", type=int, default=0)
+    parser.add_argument("--output_dir", type=str, default="models")
+    parser.add_argument("--seq_len", type=int, default=512, help="maximum sequence length")
+    # parser.add_argument("--distractors", type=int, default=99, help="number of distractors per true pair")
+    parser.add_argument("--log_level", choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], default="INFO")
+
+    args = parser.parse_args()
+
+    # TRAIN_FILE = "./codesearchnet_adv/train_adv_clf.jsonl"
+
+    TRAIN_FILE = NlCodesearchClfCodesearchnetAdv(
+        TaskConfig.from_jsonnet(jsonnet_path=Path("./codesearchnet_adv/config.jsonnet")),
+        "nl_codesearch",
+        "codesearchnet_adv",
+    ).get_dataset_raw()
+
+    # logging
+    logging.basicConfig(level=args.log_level)
+
+    # load tokenizer
+    logging.info("Loading model...")
+    tokenizer = AutoTokenizer.from_pretrained(args.model)
+
+    # load data
+    logging.info("Loading data...")
+    dataloader = load_data(tokenizer, args.batch_size, args.seq_len, TRAIN_FILE, True)
+
+    model = AutoModelForSequenceClassification.from_pretrained(args.model)
+
+    # train
+    logging.info("Training...")
+    train(model, dataloader, args)
+
+    # save model
+    logging.info("Saving model...")
+    model.save_pretrained(f"{args.output_dir}/{args.model}")
+    # also soave tokenizer
+    tokenizer.save_pretrained(f"{args.output_dir}/{args.model}")
+
+    TEST_FILES = [
+        [
+            "codesearchnetadv",
+            NlCodesearchClfCodesearchnetAdv(
+                TaskConfig.from_jsonnet(jsonnet_path=Path("./codesearchnet_adv/config.jsonnet")),
+                "nl_codesearch",
+                "codesearchnet_adv",
+            ).get_dataset_raw(),
+        ],
+        [
+            "codesearchnet_ruby",
+            NlCodesearchClfCodesearchnetRuby(
+                TaskConfig.from_jsonnet(jsonnet_path=Path("./codesearchnet_ruby/config.jsonnet")),
+                "nl_codesearch",
+                "codesearchnet_ruby",
+            ).get_dataset_raw(),
+        ],
+        [
+            "codesearchnet_go",
+            NlCodesearchClfCodesearchnetGo(
+                TaskConfig.from_jsonnet(jsonnet_path=Path("./codesearchnet_go/config.jsonnet")),
+                "nl_codesearch",
+                "codesearchnet_go",
+            ).get_dataset_raw(),
+        ],
+        [
+            "codesearchnet_java",
+            NlCodesearchClfCodesearchnetJava(
+                TaskConfig.from_jsonnet(jsonnet_path=Path("./codesearchnet_java/config.jsonnet")),
+                "nl_codesearch",
+                "codesearchnet_java",
+            ).get_dataset_raw(),
+        ],
+        [
+            "codesearchnet_javascript",
+            NlCodesearchClfCodesearchnetJavascript(
+                TaskConfig.from_jsonnet(jsonnet_path=Path("./codesearchnet_javascript/config.jsonnet")),
+                "nl_codesearch",
+                "codesearchnet_javascript",
+            ).get_dataset_raw(),
+        ],
+        [
+            "codesearchnet_php",
+            NlCodesearchClfCodesearchnetPhp(
+                TaskConfig.from_jsonnet(jsonnet_path=Path("./codesearchnet_php/config.jsonnet")),
+                "nl_codesearch",
+                "codesearchnet_php",
+            ).get_dataset_raw(),
+        ],
+        [
+            "cosqa",
+            NlCodesearchClfCosqa(
+                TaskConfig.from_jsonnet(jsonnet_path=Path("./cosqa/config.jsonnet")), "nl_codesearch", "cosqa"
+            ).get_dataset_raw(),
+        ],
+        [
+            "statcodesearch",
+            NlCodesearchClfStatcodesearch(
+                TaskConfig.from_jsonnet(jsonnet_path=Path("./statcodesearch/config.jsonnet")),
+                "nl_codesearch",
+                "statcodesearch",
+            ).get_dataset_raw(),
+        ],
+    ]
+
+    results = {}
+    for file in TEST_FILES:
+        logging.info(f"Evaluating on {file[0]}...")
+        dataloader = load_data(tokenizer, args.batch_size, args.seq_len, file[1], False)
+        metrics = clf(model, dataloader, args)
+        results[file[0]] = metrics
+        logging.info(f"Test results for {file[0]}: {metrics}")
+
+    logging.info(f"Test results: {results}")
+
+
+if __name__ == "__main__":
+    main()