From 928e8bb6f50d1e93ef5d0bcaa81f8c5fd9a6f4d8 Mon Sep 17 00:00:00 2001 From: Baber Abbasi <92168766+baberabb@users.noreply.github.com> Date: Fri, 30 Aug 2024 06:40:41 +0500 Subject: [PATCH] hotfix #2262 (#2264) * max_length - 1 (generation always >= 1) * vllm: fix rolling prefix_token * nit: add comment * fixup! max_length should be handled for logliklihoods * Revert "fixup! max_length should be handled for logliklihoods" This reverts commit 432d1a3b754c117c3a54ea2fe792ab3a1bd09ed3. --- lm_eval/models/api_models.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/lm_eval/models/api_models.py b/lm_eval/models/api_models.py index 25223e906a..9d21705512 100644 --- a/lm_eval/models/api_models.py +++ b/lm_eval/models/api_models.py @@ -104,8 +104,9 @@ def __init__( self._truncate = truncate self._max_gen_toks = int(max_gen_toks) self._seed = int(seed) - eval_logger.info(f"Using max length {max_length}") - self.max_length = max_length + # max_length - 1 as we always have 1 token for generation + eval_logger.info(f"Using max length {max_length} - 1") + self.max_length = max_length - 1 if int(num_concurrent) <= 1: eval_logger.info( "Concurrent requests are disabled. To enable concurrent requests, set `num_concurrent` > 1." @@ -419,9 +420,9 @@ def batch_logliklehood_requests( for chunk in chunks: for cache_key, context_enc, continuation_enc in chunk: # max_length - 1 as we always have 1 token for generation - inp = (context_enc + continuation_enc)[-(self.max_length - 1) :] + inp = (context_enc + continuation_enc)[-(self.max_length) :] ctxlen = len(context_enc) - max( - 0, len(context_enc) + len(continuation_enc) - (self.max_length - 1) + 0, len(context_enc) + len(continuation_enc) - (self.max_length) ) inputs.append(inp)