Fix maximum seqlen for gptq quantization (huggingface#1748)

fix gptq calibration data
young-developer · May 10, 2024 · 3c17f0c · 3c17f0c
1 parent ace6dac
commit 3c17f0c
Showing 1 changed file with 2 additions and 1 deletion.
diff --git a/optimum/gptq/quantizer.py b/optimum/gptq/quantizer.py
@@ -354,7 +354,8 @@ def quantize_model(self, model: nn.Module, tokenizer: Optional[Any] = None):
             self.use_cuda_fp16 = model.dtype == torch.float16
 
         if self.model_seqlen is None:
-            self.model_seqlen = get_seqlen(model)
+            # We allow a max value of 4028 to avoid passing data with huge length to the model during the calibration step
+            self.model_seqlen = min(4028, get_seqlen(model))
 
         device = get_device(model)