diff --git a/llmfoundry/models/hf/model_wrapper.py b/llmfoundry/models/hf/model_wrapper.py index 3a618dd7e5..f387562b2f 100644 --- a/llmfoundry/models/hf/model_wrapper.py +++ b/llmfoundry/models/hf/model_wrapper.py @@ -60,7 +60,8 @@ def __init__(self, self.model.forward).args # inspect.getfullargspec HuggingFace quantized model could not return args correctly if not self.model_forward_args: - self.model_forward_args = inspect.signature(self.model.forward).parameters.keys() + self.model_forward_args = inspect.signature( + self.model.forward).parameters.keys() # Note: We need to add the FSDP related attributes to the model AFTER the super init, # so that the (possible) embedding resizing doesn't destroy them diff --git a/scripts/eval/eval.py b/scripts/eval/eval.py index 8bae5af0e6..6808e0ee0b 100644 --- a/scripts/eval/eval.py +++ b/scripts/eval/eval.py @@ -122,8 +122,9 @@ def evaluate_model(model_cfg: DictConfig, dist_timeout: Union[float, int], model_gauntlet) # type: ignore if fsdp_config and model_cfg.model.load_in_8bit: - raise ValueError("The FSDP config block is not supported when loading " + - "Hugging Face models in 8bit.") + raise ValueError( + 'The FSDP config block is not supported when loading ' + + 'Hugging Face models in 8bit.') if hasattr(model_cfg.model, 'pretrained_lora_id_or_path'): composer_model = load_peft_model(model_cfg.model, tokenizer, diff --git a/scripts/eval/yamls/hf_8bit_eval.yaml b/scripts/eval/yamls/hf_8bit_eval.yaml index 3acced4845..4f195cdc54 100644 --- a/scripts/eval/yamls/hf_8bit_eval.yaml +++ b/scripts/eval/yamls/hf_8bit_eval.yaml @@ -22,4 +22,4 @@ device_eval_batch_size: 4 # With load_in_8bit, do not specify fsdp_config -icl_tasks: 'eval/yamls/tasks_light.yaml' \ No newline at end of file +icl_tasks: 'eval/yamls/tasks_light.yaml' diff --git a/scripts/train/train.py b/scripts/train/train.py index e62da22cbb..c3a65c08c3 100644 --- a/scripts/train/train.py +++ b/scripts/train/train.py @@ -90,9 +90,11 @@ def validate_config(cfg: DictConfig): 'Setting `torch._dynamo.config.suppress_errors = True` and falling back to eager.' ) torch._dynamo.config.suppress_errors = True # type: ignore - + if cfg.model.get('load_in_8bit', False): - raise ValueError("`load_in_8bit` is only supported for evaluation rather than training.") + raise ValueError( + '`load_in_8bit` is only supported for evaluation rather than training.' + ) def build_composer_model(model_cfg: DictConfig,