Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add GPTQModel support for inferencing GPTQ models #2217

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,14 @@ lm_eval --model hf \
--tasks hellaswag
```

[GPTQModel](https://github.com/ModelCloud/GPTQModel) quantized models can be loaded by specifying their file names in `,gptqmodel=True` in the `model_args` argument:

```bash
lm_eval --model hf \
--model_args pretrained=model-name-or-path,gptqmodel=True \
--tasks hellaswag
```

We support wildcards in task names, for example you can run all of the machine-translated lambada tasks via `--task lambada_openai_mt_*`.

## Saving Results
Expand Down
57 changes: 41 additions & 16 deletions lm_eval/models/huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ def __init__(
peft: Optional[str] = None,
delta: Optional[str] = None,
autogptq: Optional[Union[bool, str]] = False,
gptqmodel: Optional[bool] = False,
**kwargs,
) -> None:
super().__init__()
Expand Down Expand Up @@ -193,6 +194,7 @@ def __init__(
peft=peft,
delta=delta,
autogptq=autogptq,
gptqmodel=gptqmodel,
**kwargs,
)

Expand Down Expand Up @@ -529,6 +531,7 @@ def _create_model(
peft: Optional[str] = None,
delta: Optional[str] = None,
autogptq: Optional[Union[bool, str]] = False,
gptqmodel: Optional[bool] = False,
**kwargs,
) -> None:
"""
Expand Down Expand Up @@ -556,7 +559,7 @@ def _create_model(
)
)

if not autogptq:
if not autogptq and not gptqmodel:
if model_kwargs.get("load_in_4bit", None):
assert (
transformers.__version__ >= "4.30.0"
Expand All @@ -576,23 +579,45 @@ def _create_model(
**model_kwargs,
)
else:
try:
from auto_gptq import AutoGPTQForCausalLM
except ModuleNotFoundError:
raise Exception(
"Tried to load auto_gptq, but auto-gptq is not installed ",
"please install auto-gptq via pip install lm-eval[gptq] or pip install -e .[gptq]",

if autogptq and gptqmodel:
raise ValueError(
"Cannot use both 'autogptq' and 'gptqmodel' options at the same time."
)

self._model = AutoGPTQForCausalLM.from_quantized(
pretrained,
trust_remote_code=trust_remote_code,
model_basename=None if autogptq is True else Path(autogptq).stem,
use_safetensors=True
if autogptq is True
else autogptq.endswith(".safetensors"),
**model_kwargs,
)
if autogptq:
try:
from auto_gptq import AutoGPTQForCausalLM
except ModuleNotFoundError:
raise Exception(
"Tried to load auto_gptq, but auto-gptq is not installed ",
"please install auto-gptq via pip install lm-eval[gptq] or pip install -e .[gptq]",
)

self._model = AutoGPTQForCausalLM.from_quantized(
pretrained,
trust_remote_code=trust_remote_code,
model_basename=None if autogptq is True else Path(autogptq).stem,
use_safetensors=True
if autogptq is True
else autogptq.endswith(".safetensors"),
**model_kwargs,
)

if gptqmodel:
try:
from gptqmodel import GPTQModel
except ModuleNotFoundError:
raise Exception(
"Tried to load gptqmodel, but gptqmodel is not installed ",
"please install gptqmodel via `pip install gptqmodel --no-build-isolation` or `pip install lm-eval[gptqmodel] --no-build-isolation`",
)

self._model = GPTQModel.from_quantized(
pretrained,
trust_remote_code=trust_remote_code,
**model_kwargs
)

if peft and delta:
raise ValueError(
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ testing = ["pytest", "pytest-cov", "pytest-xdist"]
vllm = ["vllm>=0.4.2"]
zeno = ["pandas", "zeno-client"]
wandb = ["wandb>=0.16.3", "pandas", "numpy"]
gptqmodel = ["gptqmodel>=1.0.6"]
all = [
"lm_eval[anthropic]",
"lm_eval[dev]",
Expand Down