Refactor diffusers tasks (#1947)

* refactor diffusers tasks "stable-diffusion" and "stable-diffusion-xl" into "text-to-image", "image-to-image" and "inpainting" * warn depreated tasks * generalize diffusion export * fix * fix * fix * clean up * trocr * fix * standardise model/pipeline mapping task lookup * add latent consistency * test * fix * fix * final * refactor * fix * fix offline hub support * remove unnecessary * misc * test * style * update docs
huggingface · Jul 16, 2024 · ab4341b · ab4341b
1 parent b865809
commit ab4341b
Show file tree

Hide file tree

Showing 12 changed files with 462 additions and 301 deletions.
diff --git a/docs/source/exporters/onnx/usage_guides/export_a_model.mdx b/docs/source/exporters/onnx/usage_guides/export_a_model.mdx
@@ -87,7 +87,7 @@ Required arguments:
   output                Path indicating the directory where to store generated ONNX model.
 
 Optional arguments:
-  --task TASK           The task to export the model for. If not specified, the task will be auto-inferred based on the model. Available tasks depend on the model, but are among: ['default', 'fill-mask', 'text-generation', 'text2text-generation', 'text-classification', 'token-classification', 'multiple-choice', 'object-detection', 'question-answering', 'image-classification', 'image-segmentation', 'masked-im', 'semantic-segmentation', 'automatic-speech-recognition', 'audio-classification', 'audio-frame-classification', 'automatic-speech-recognition', 'audio-xvector', 'image-to-text', 'stable-diffusion', 'zero-shot-object-detection']. For decoder models, use `xxx-with-past` to export the model using past key values in the decoder.
+  --task TASK           The task to export the model for. If not specified, the task will be auto-inferred based on the model. Available tasks depend on the model, but are among: ['default', 'fill-mask', 'text-generation', 'text2text-generation', 'text-classification', 'token-classification', 'multiple-choice', 'object-detection', 'question-answering', 'image-classification', 'image-segmentation', 'masked-im', 'semantic-segmentation', 'automatic-speech-recognition', 'audio-classification', 'audio-frame-classification', 'automatic-speech-recognition', 'audio-xvector', 'image-to-text', 'zero-shot-object-detection', 'image-to-image', 'inpainting', 'text-to-image']. For decoder models, use `xxx-with-past` to export the model using past key values in the decoder.
   --monolith            Force to export the model as a single ONNX file. By default, the ONNX exporter may break the model in several ONNX files, for example for encoder-decoder models where the encoder should be run only once while the decoder is looped over.
   --device DEVICE       The device to use to do the export. Defaults to "cpu".
   --opset OPSET         If specified, ONNX opset version to export the model with. Otherwise, the default opset will be used.

diff --git a/docs/source/exporters/tflite/usage_guides/export_a_model.mdx b/docs/source/exporters/tflite/usage_guides/export_a_model.mdx
@@ -59,7 +59,7 @@ Optional arguments:
                         the model, but are among: ['default', 'fill-mask', 'text-generation', 'text2text-generation', 'text-classification', 'token-classification',
                         'multiple-choice', 'object-detection', 'question-answering', 'image-classification', 'image-segmentation', 'masked-im', 'semantic-
                         segmentation', 'automatic-speech-recognition', 'audio-classification', 'audio-frame-classification', 'automatic-speech-recognition', 'audio-xvector', 'vision2seq-
-                        lm', 'stable-diffusion', 'zero-shot-object-detection']. For decoder models, use `xxx-with-past` to export the model using past key
+                        lm', 'zero-shot-object-detection', 'text-to-image', 'image-to-image', 'inpainting']. For decoder models, use `xxx-with-past` to export the model using past key
                         values in the decoder.
   --atol ATOL           If specified, the absolute difference tolerance when validating the model. Otherwise, the default atol for the model will be used.
   --pad_token_id PAD_TOKEN_ID

diff --git a/optimum/exporters/onnx/__init__.py b/optimum/exporters/onnx/__init__.py
@@ -31,7 +31,7 @@
     "utils": [
         "get_decoder_models_for_export",
         "get_encoder_decoder_models_for_export",
-        "get_stable_diffusion_models_for_export",
+        "get_diffusion_models_for_export",
         "MODEL_TYPES_REQUIRING_POSITION_IDS",
     ],
     "__main__": ["main_export"],
@@ -50,7 +50,7 @@
     from .utils import (
         get_decoder_models_for_export,
         get_encoder_decoder_models_for_export,
-        get_stable_diffusion_models_for_export,
+        get_diffusion_models_for_export,
         MODEL_TYPES_REQUIRING_POSITION_IDS,
     )
     from .__main__ import main_export

diff --git a/optimum/exporters/onnx/__main__.py b/optimum/exporters/onnx/__main__.py
@@ -221,13 +221,24 @@ def main_export(
             " and passing it is not required anymore."
         )
 
+    if task in ["stable-diffusion", "stable-diffusion-xl"]:
+        logger.warning(
+            f"The task `{task}` is deprecated and will be removed in a future release of Optimum. "
+            "Please use one of the following tasks instead: `text-to-image`, `image-to-image`, `inpainting`."
+        )
+
     original_task = task
     task = TasksManager.map_from_synonym(task)
 
-    framework = TasksManager.determine_framework(model_name_or_path, subfolder=subfolder, framework=framework)
-    library_name = TasksManager.infer_library_from_model(
-        model_name_or_path, subfolder=subfolder, library_name=library_name
-    )
+    if framework is None:
+        framework = TasksManager.determine_framework(
+            model_name_or_path, subfolder=subfolder, revision=revision, cache_dir=cache_dir, token=token
+        )
+
+    if library_name is None:
+        library_name = TasksManager.infer_library_from_model(
+            model_name_or_path, subfolder=subfolder, revision=revision, cache_dir=cache_dir, token=token
+        )
 
     torch_dtype = None
     if framework == "pt":
@@ -321,9 +332,7 @@ def main_export(
                 )
             model.config.pad_token_id = pad_token_id
 
-    if "stable-diffusion" in task:
-        model_type = "stable-diffusion"
-    elif hasattr(model.config, "export_model_type"):
+    if hasattr(model.config, "export_model_type"):
         model_type = model.config.export_model_type.replace("_", "-")
     else:
         model_type = model.config.model_type.replace("_", "-")

diff --git a/optimum/exporters/onnx/convert.py b/optimum/exporters/onnx/convert.py
@@ -60,7 +60,7 @@
     from transformers.modeling_utils import PreTrainedModel
 
 if is_diffusers_available():
-    from diffusers import ModelMixin
+    from diffusers import DiffusionPipeline, ModelMixin
 
 if is_tf_available():
     from transformers.modeling_tf_utils import TFPreTrainedModel
@@ -264,7 +264,7 @@ def _run_validation(
         atol = config.ATOL_FOR_VALIDATION
 
     if "diffusers" in str(reference_model.__class__) and not is_diffusers_available():
-        raise ImportError("The pip package `diffusers` is required to validate stable diffusion ONNX models.")
+        raise ImportError("The pip package `diffusers` is required to validate diffusion ONNX models.")
 
     framework = "pt" if is_torch_available() and isinstance(reference_model, nn.Module) else "tf"
 
@@ -388,7 +388,7 @@ def _run_validation(
         logger.info(f"\t-[✓] ONNX model output names match reference model ({onnx_output_names})")
 
     if "diffusers" in str(reference_model.__class__) and not is_diffusers_available():
-        raise ImportError("The pip package `diffusers` is required to validate stable diffusion ONNX models.")
+        raise ImportError("The pip package `diffusers` is required to validate diffusion ONNX models.")
 
     # Check the shape and values match
     shape_failures = []
@@ -854,7 +854,7 @@ def export(
         opset = config.DEFAULT_ONNX_OPSET
 
     if "diffusers" in str(model.__class__) and not is_diffusers_available():
-        raise ImportError("The pip package `diffusers` is required to export stable diffusion models to ONNX.")
+        raise ImportError("The pip package `diffusers` is required to export diffusion models to ONNX.")
 
     if not config.is_transformers_support_available:
         import transformers
@@ -912,7 +912,7 @@ def export(
 
 
 def onnx_export_from_model(
-    model: Union["PreTrainedModel", "TFPreTrainedModel"],
+    model: Union["PreTrainedModel", "TFPreTrainedModel", "DiffusionPipeline"],
     output: Union[str, Path],
     opset: Optional[int] = None,
     optimize: Optional[str] = None,
@@ -999,15 +999,16 @@ def onnx_export_from_model(
     >>> onnx_export_from_model(model, output="gpt2_onnx/")
     ```
     """
-    library_name = TasksManager._infer_library_from_model(model)
 
-    TasksManager.standardize_model_attributes(model, library_name)
+    TasksManager.standardize_model_attributes(model)
 
     if hasattr(model.config, "export_model_type"):
         model_type = model.config.export_model_type.replace("_", "-")
     else:
         model_type = model.config.model_type.replace("_", "-")
 
+    library_name = TasksManager.infer_library_from_model(model)
+
     custom_architecture = library_name == "transformers" and model_type not in TasksManager._SUPPORTED_MODEL_TYPE
 
     if task is not None:
@@ -1191,7 +1192,7 @@ def onnx_export_from_model(
         optimizer.optimize(save_dir=output, optimization_config=optimization_config, file_suffix="")
 
     # Optionally post process the obtained ONNX file(s), for example to merge the decoder / decoder with past if any
-    # TODO: treating stable diffusion separately is quite ugly
+    # TODO: treating diffusion separately is quite ugly
     if not no_post_process and library_name != "diffusers":
         try:
             logger.info("Post-processing the exported models...")

diff --git a/optimum/exporters/onnx/utils.py b/optimum/exporters/onnx/utils.py
@@ -34,6 +34,9 @@
 from ..utils import (
     get_decoder_models_for_export as _get_decoder_models_for_export,
 )
+from ..utils import (
+    get_diffusion_models_for_export as _get_diffusion_models_for_export,
+)
 from ..utils import (
     get_encoder_decoder_models_for_export as _get_encoder_decoder_models_for_export,
 )
@@ -43,9 +46,6 @@
 from ..utils import (
     get_speecht5_models_for_export as _get_speecht5_models_for_export,
 )
-from ..utils import (
-    get_stable_diffusion_models_for_export as _get_stable_diffusion_models_for_export,
-)
 
 
 logger = logging.get_logger()
@@ -68,7 +68,7 @@
         from transformers.modeling_tf_utils import TFPreTrainedModel
 
     if is_diffusers_available():
-        from diffusers import ModelMixin, StableDiffusionPipeline
+        from diffusers import DiffusionPipeline, ModelMixin
 
 
 MODEL_TYPES_REQUIRING_POSITION_IDS = {
@@ -219,13 +219,13 @@ def _get_submodels_and_onnx_configs(
 DEPRECATION_WARNING_GET_MODEL_FOR_EXPORT = "The usage of `optimum.exporters.onnx.utils.get_{model_type}_models_for_export` is deprecated and will be removed in a future release, please use `optimum.exporters.utils.get_{model_type}_models_for_export` instead."
 
 
-def get_stable_diffusion_models_for_export(
-    pipeline: "StableDiffusionPipeline",
+def get_diffusion_models_for_export(
+    pipeline: "DiffusionPipeline",
     int_dtype: str = "int64",
     float_dtype: str = "fp32",
 ) -> Dict[str, Tuple[Union["PreTrainedModel", "ModelMixin"], "ExportConfig"]]:
-    logger.warning(DEPRECATION_WARNING_GET_MODEL_FOR_EXPORT.format(model_type="stable_diffusion"))
-    return _get_stable_diffusion_models_for_export(pipeline, int_dtype, float_dtype, exporter="onnx")
+    logger.warning(DEPRECATION_WARNING_GET_MODEL_FOR_EXPORT.format(model_type="diffusion"))
+    return _get_diffusion_models_for_export(pipeline, int_dtype, float_dtype, exporter="onnx")
 
 
 def get_sam_models_for_export(model: Union["PreTrainedModel", "TFPreTrainedModel"], config: "ExportConfig"):