diff --git a/comfy/model_management.py b/comfy/model_management.py index 5461d28e4fe..74f3dadba87 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -245,6 +245,8 @@ def unload_model(): n.cpu() current_gpu_controlnets = [] +def minimum_inference_memory(): + return (768 * 1024 * 1024) def load_model_gpu(model): global current_loaded_model @@ -272,7 +274,7 @@ def load_model_gpu(model): model_size = model.model_size() current_free_mem = get_free_memory(torch_dev) lowvram_model_memory = int(max(256 * (1024 * 1024), (current_free_mem - 1024 * (1024 * 1024)) / 1.3 )) - if model_size > (current_free_mem - (512 * 1024 * 1024)): #only switch to lowvram if really necessary + if model_size > (current_free_mem - minimum_inference_memory()): #only switch to lowvram if really necessary vram_set_state = VRAMState.LOW_VRAM current_loaded_model = model @@ -458,7 +460,7 @@ def is_device_cpu(device): return True return False -def should_use_fp16(device=None): +def should_use_fp16(device=None, model_params=0): global xpu_available global directml_enabled @@ -482,10 +484,27 @@ def should_use_fp16(device=None): return True props = torch.cuda.get_device_properties("cuda") + if props.major < 6: + return False + + fp16_works = False + #FP16 is confirmed working on a 1080 (GP104) but it's a bit slower than FP32 so it should only be enabled + #when the model doesn't actually fit on the card + #TODO: actually test if GP106 and others have the same type of behavior + nvidia_10_series = ["1080", "1070", "titan x", "p3000", "p3200", "p4000", "p4200", "p5000", "p5200", "p6000", "1060", "1050"] + for x in nvidia_10_series: + if x in props.name.lower(): + fp16_works = True + + if fp16_works: + free_model_memory = (get_free_memory() * 0.9 - minimum_inference_memory()) + if model_params * 4 > free_model_memory: + return True + if props.major < 7: return False - #FP32 is faster on those cards? + #FP16 is just broken on these cards nvidia_16_series = ["1660", "1650", "1630", "T500", "T550", "T600"] for x in nvidia_16_series: if x in props.name: diff --git a/comfy/sd.py b/comfy/sd.py index 3d79c7c04fb..fc3551fea72 100644 --- a/comfy/sd.py +++ b/comfy/sd.py @@ -1122,6 +1122,12 @@ class EmptyClass: return (ModelPatcher(model, load_device=model_management.get_torch_device(), offload_device=offload_device), clip, vae) +def calculate_parameters(sd, prefix): + params = 0 + for k in sd.keys(): + if k.startswith(prefix): + params += sd[k].nelement() + return params def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, output_clipvision=False, embedding_directory=None): sd = utils.load_torch_file(ckpt_path) @@ -1132,7 +1138,8 @@ def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, o model = None clip_target = None - fp16 = model_management.should_use_fp16() + parameters = calculate_parameters(sd, "model.diffusion_model.") + fp16 = model_management.should_use_fp16(model_params=parameters) class WeightsLoader(torch.nn.Module): pass