EleutherAI · Qubitium · Aug 1, 2024 · Aug 1, 2024 · Aug 3, 2024 · Aug 8, 2024
@@ -327,6 +327,14 @@ lm_eval --model hf \
     --tasks hellaswag
 ```
 
+[GPTQModel](https://github.com/ModelCloud/GPTQModel) quantized models can be loaded by specifying their file names in `,gptqmodel=True` in the `model_args` argument:
+
+```bash
+lm_eval --model hf \
+    --model_args pretrained=model-name-or-path,gptqmodel=True \
+    --tasks hellaswag
+```
+
 We support wildcards in task names, for example you can run all of the machine-translated lambada tasks via `--task lambada_openai_mt_*`.
 
 ## Saving Results

@@ -87,6 +87,7 @@ def __init__(
         peft: Optional[str] = None,
         delta: Optional[str] = None,
         autogptq: Optional[Union[bool, str]] = False,
+        gptqmodel: Optional[bool] = False,
         **kwargs,
     ) -> None:
         super().__init__()
@@ -193,6 +194,7 @@ def __init__(
                 peft=peft,
                 delta=delta,
                 autogptq=autogptq,
+                gptqmodel=gptqmodel,
                 **kwargs,
             )
 
@@ -529,6 +531,7 @@ def _create_model(
         peft: Optional[str] = None,
         delta: Optional[str] = None,
         autogptq: Optional[Union[bool, str]] = False,
+        gptqmodel: Optional[bool] = False,
         **kwargs,
     ) -> None:
         """
@@ -556,7 +559,7 @@ def _create_model(
             )
         )
 
-        if not autogptq:
+        if not autogptq and not gptqmodel:
             if model_kwargs.get("load_in_4bit", None):
                 assert (
                     transformers.__version__ >= "4.30.0"
@@ -576,23 +579,45 @@ def _create_model(
                 **model_kwargs,
             )
         else:
-            try:
-                from auto_gptq import AutoGPTQForCausalLM
-            except ModuleNotFoundError:
-                raise Exception(
-                    "Tried to load auto_gptq, but auto-gptq is not installed ",
-                    "please install auto-gptq via pip install lm-eval[gptq] or pip install -e .[gptq]",
+
+            if autogptq and gptqmodel:
+                raise ValueError(
+                    "Cannot use both 'autogptq' and 'gptqmodel' options at the same time."
                 )
 
-            self._model = AutoGPTQForCausalLM.from_quantized(
-                pretrained,
-                trust_remote_code=trust_remote_code,
-                model_basename=None if autogptq is True else Path(autogptq).stem,
-                use_safetensors=True
-                if autogptq is True
-                else autogptq.endswith(".safetensors"),
-                **model_kwargs,
-            )
+            if autogptq:
+                try:
+                    from auto_gptq import AutoGPTQForCausalLM
+                except ModuleNotFoundError:
+                    raise Exception(
+                        "Tried to load auto_gptq, but auto-gptq is not installed ",
+                        "please install auto-gptq via pip install lm-eval[gptq] or pip install -e .[gptq]",
+                    )
+
+                self._model = AutoGPTQForCausalLM.from_quantized(
+                    pretrained,
+                    trust_remote_code=trust_remote_code,
+                    model_basename=None if autogptq is True else Path(autogptq).stem,
+                    use_safetensors=True
+                    if autogptq is True
+                    else autogptq.endswith(".safetensors"),
+                    **model_kwargs,
+                )
+
+            if gptqmodel:
+                try:
+                    from gptqmodel import GPTQModel
+                except ModuleNotFoundError:
+                    raise Exception(
+                        "Tried to load gptqmodel, but gptqmodel is not installed ",
+                        "please install gptqmodel via `pip install gptqmodel --no-build-isolation` or `pip install lm-eval[gptqmodel] --no-build-isolation`",
+                    )
+
+                self._model = GPTQModel.from_quantized(
+                    pretrained,
+                    trust_remote_code=trust_remote_code,
+                    **model_kwargs
+                )
 
         if peft and delta:
             raise ValueError(

@@ -75,6 +75,7 @@ testing = ["pytest", "pytest-cov", "pytest-xdist"]
 vllm = ["vllm>=0.4.2"]
 zeno = ["pandas", "zeno-client"]
 wandb = ["wandb>=0.16.3", "pandas", "numpy"]
+gptqmodel = ["gptqmodel>=1.0.6"]
 all = [
     "lm_eval[anthropic]",
     "lm_eval[dev]",