diff --git a/llmfoundry/utils/builders.py b/llmfoundry/utils/builders.py index b4d4143006..e6355536c7 100644 --- a/llmfoundry/utils/builders.py +++ b/llmfoundry/utils/builders.py @@ -498,10 +498,15 @@ def _validate_cfg(icl_cfg: DictConfig): icl_cfg.batch_size = default_batch_size if 'pass_at_k' not in icl_cfg: icl_cfg.pass_at_k = 1 - if 'num_beams' not in icl_cfg: - icl_cfg.num_beams = 20 if 'fewshot_random_seed' not in icl_cfg: icl_cfg.fewshot_random_seed = 1234 + if 'generations_per_sample' not in icl_cfg: + icl_cfg.generations_per_sample = 1 + + if 'num_beams' in icl_cfg: + raise ValueError( + 'num_beams is no longer supported as a top level icl_task parameter.' + \ + 'Please use generation_kwargs.num_beams instead.') for icl_cfg in icl_tasks_list: assert isinstance(icl_cfg, DictConfig) @@ -547,7 +552,7 @@ def _validate_cfg(icl_cfg: DictConfig): destination_path=destination_path, fewshot_random_seed=icl_cfg.fewshot_random_seed, pass_at_k=icl_cfg.pass_at_k, - generations_per_sample=icl_cfg.num_beams, + generations_per_sample=icl_cfg.generations_per_sample, has_categories=icl_cfg.get('has_categories', False), cot_delimiter=icl_cfg.get('cot_delimiter', ''), generation_kwargs=icl_cfg.get('generation_kwargs', {}), diff --git a/scripts/eval/yamls/coding_tasks.yaml b/scripts/eval/yamls/coding_tasks.yaml index 48131a0eae..78f2a213bc 100644 --- a/scripts/eval/yamls/coding_tasks.yaml +++ b/scripts/eval/yamls/coding_tasks.yaml @@ -4,7 +4,7 @@ icl_tasks: dataset_uri: eval/local_data/programming/human_eval.jsonl # ADD YOUR OWN DATASET URI num_fewshot: [0] pass_at_k: 1 - num_beams: 5 + generations_per_sample: 5 batch_size: 1 icl_task_type: code_evaluation - @@ -12,7 +12,7 @@ icl_tasks: dataset_uri: eval/local_data/programming/processed_human_eval_cpp.jsonl # ADD YOUR OWN DATASET URI num_fewshot: [0] pass_at_k: 1 - num_beams: 5 + generations_per_sample: 5 batch_size: 1 icl_task_type: code_evaluation - @@ -20,7 +20,7 @@ icl_tasks: dataset_uri: eval/local_data/programming/processed_human_eval_js.jsonl # ADD YOUR OWN DATASET URI num_fewshot: [0] pass_at_k: 1 - num_beams: 5 + generations_per_sample: 5 batch_size: 1 icl_task_type: code_evaluation - @@ -28,7 +28,7 @@ icl_tasks: dataset_uri: eval/local_data/programming/human_eval_return_simple.jsonl # ADD YOUR OWN DATASET URI num_fewshot: [0] pass_at_k: 1 - num_beams: 5 + generations_per_sample: 5 batch_size: 1 icl_task_type: code_evaluation - @@ -36,7 +36,7 @@ icl_tasks: dataset_uri: eval/local_data/programming/human_eval_return_complex.jsonl # ADD YOUR OWN DATASET URI num_fewshot: [0] pass_at_k: 1 - num_beams: 5 + generations_per_sample: 5 batch_size: 1 icl_task_type: code_evaluation - @@ -44,7 +44,7 @@ icl_tasks: dataset_uri: eval/local_data/programming/human_eval-0.25.jsonl # ADD YOUR OWN DATASET URI num_fewshot: [0] pass_at_k: 1 - num_beams: 5 + generations_per_sample: 5 batch_size: 1 icl_task_type: code_evaluation - @@ -52,7 +52,7 @@ icl_tasks: dataset_uri: eval/local_data/programming/human_eval-0.5.jsonl # ADD YOUR OWN DATASET URI num_fewshot: [0] pass_at_k: 1 - num_beams: 5 + generations_per_sample: 5 batch_size: 1 icl_task_type: code_evaluation - @@ -60,6 +60,6 @@ icl_tasks: dataset_uri: eval/local_data/programming/human_eval-0.75.jsonl # ADD YOUR OWN DATASET URI num_fewshot: [0] pass_at_k: 1 - num_beams: 5 + generations_per_sample: 5 batch_size: 1 icl_task_type: code_evaluation diff --git a/scripts/eval/yamls/tasks.yaml b/scripts/eval/yamls/tasks.yaml index 89f5fdef38..75d4c02d0e 100644 --- a/scripts/eval/yamls/tasks.yaml +++ b/scripts/eval/yamls/tasks.yaml @@ -178,7 +178,7 @@ icl_tasks: # dataset_uri: eval/local_data/programming/human_eval.jsonl # num_fewshot: [0] # pass_at_k: 1 -# num_beams: 20 +# generations_per_sample: 20 # batch_size: 1 # icl_task_type: code_evaluation # - @@ -186,7 +186,7 @@ icl_tasks: # dataset_uri: eval/local_data/programming/processed_human_eval_cpp.jsonl # num_fewshot: [0] # pass_at_k: 1 -# num_beams: 20 +# generations_per_sample: 20 # batch_size: 1 # icl_task_type: code_evaluation # - @@ -194,7 +194,7 @@ icl_tasks: # dataset_uri: eval/local_data/programming/processed_human_eval_js.jsonl # num_fewshot: [0] # pass_at_k: 1 -# num_beams: 20 +# generations_per_sample: 20 # batch_size: 1 # icl_task_type: code_evaluation # - @@ -202,7 +202,7 @@ icl_tasks: # dataset_uri: eval/local_data/programming/human_eval_return_simple.jsonl # num_fewshot: [0] # pass_at_k: 1 -# num_beams: 20 +# generations_per_sample: 20 # batch_size: 1 # icl_task_type: code_evaluation # - @@ -210,7 +210,7 @@ icl_tasks: # dataset_uri: eval/local_data/programming/human_eval_return_complex.jsonl # num_fewshot: [0] # pass_at_k: 1 -# num_beams: 20 +# generations_per_sample: 20 # batch_size: 1 # icl_task_type: code_evaluation # - @@ -218,7 +218,7 @@ icl_tasks: # dataset_uri: eval/local_data/programming/human_eval-0.25.jsonl # num_fewshot: [0] # pass_at_k: 1 -# num_beams: 20 +# generations_per_sample: 20 # batch_size: 1 # icl_task_type: code_evaluation # - @@ -226,7 +226,7 @@ icl_tasks: # dataset_uri: eval/local_data/programming/human_eval-0.5.jsonl # num_fewshot: [0] # pass_at_k: 1 -# num_beams: 20 +# generations_per_sample: 20 # batch_size: 1 # icl_task_type: code_evaluation # - @@ -234,6 +234,6 @@ icl_tasks: # dataset_uri: eval/local_data/programming/human_eval-0.75.jsonl # num_fewshot: [0] # pass_at_k: 1 -# num_beams: 20 +# generations_per_sample: 20 # batch_size: 1 # icl_task_type: code_evaluation