diff --git a/optimum/onnxruntime/modeling_diffusion.py b/optimum/onnxruntime/modeling_diffusion.py index 1a84aa3d22..7677ed26d0 100644 --- a/optimum/onnxruntime/modeling_diffusion.py +++ b/optimum/onnxruntime/modeling_diffusion.py @@ -34,6 +34,7 @@ from diffusers.utils import CONFIG_NAME from huggingface_hub import snapshot_download from transformers import CLIPFeatureExtractor, CLIPTokenizer +from transformers.file_utils import add_end_docstrings import onnxruntime as ort @@ -51,7 +52,7 @@ DIFFUSION_MODEL_VAE_DECODER_SUBFOLDER, DIFFUSION_MODEL_VAE_ENCODER_SUBFOLDER, ) -from .modeling_ort import ORTModel +from .modeling_ort import ONNX_MODEL_END_DOCSTRING, ORTModel from .utils import ( _ORT_TO_NP_TYPE, ONNX_WEIGHTS_NAME, @@ -531,18 +532,31 @@ def forward(self, sample: np.ndarray): return outputs +@add_end_docstrings(ONNX_MODEL_END_DOCSTRING) class ORTStableDiffusionPipeline(StableDiffusionPipelineMixin, ORTStableDiffusionPipelineBase): + """ + ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.StableDiffusionPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/text2img#diffusers.StableDiffusionPipeline). + """ + pass -class ORTStableDiffusionImg2ImgPipeline(ORTStableDiffusionPipelineBase, StableDiffusionImg2ImgPipelineMixin): - def __call__(self, *args, **kwargs): - return StableDiffusionImg2ImgPipelineMixin.__call__(self, *args, **kwargs) +@add_end_docstrings(ONNX_MODEL_END_DOCSTRING) +class ORTStableDiffusionImg2ImgPipeline(StableDiffusionImg2ImgPipelineMixin, ORTStableDiffusionPipelineBase): + """ + ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.StableDiffusionImg2ImgPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/img2img#diffusers.StableDiffusionImg2ImgPipeline). + """ + pass -class ORTStableDiffusionInpaintPipeline(ORTStableDiffusionPipelineBase, StableDiffusionInpaintPipelineMixin): - def __call__(self, *args, **kwargs): - return StableDiffusionInpaintPipelineMixin.__call__(self, *args, **kwargs) + +@add_end_docstrings(ONNX_MODEL_END_DOCSTRING) +class ORTStableDiffusionInpaintPipeline(StableDiffusionInpaintPipelineMixin, ORTStableDiffusionPipelineBase): + """ + ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.StableDiffusionInpaintPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/inpaint#diffusers.StableDiffusionInpaintPipeline). + """ + + pass class ORTStableDiffusionXLPipelineBase(ORTStableDiffusionPipelineBase): @@ -584,11 +598,19 @@ def __init__( self.watermark = StableDiffusionXLWatermarker() -class ORTStableDiffusionXLPipeline(ORTStableDiffusionXLPipelineBase, StableDiffusionXLPipelineMixin): - def __call__(self, *args, **kwargs): - return StableDiffusionXLPipelineMixin.__call__(self, *args, **kwargs) +@add_end_docstrings(ONNX_MODEL_END_DOCSTRING) +class ORTStableDiffusionXLPipeline(StableDiffusionXLPipelineMixin, ORTStableDiffusionXLPipelineBase): + """ + ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.StableDiffusionXLPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#diffusers.StableDiffusionXLPipeline). + """ + pass -class ORTStableDiffusionXLImg2ImgPipeline(ORTStableDiffusionXLPipelineBase, StableDiffusionXLImg2ImgPipelineMixin): - def __call__(self, *args, **kwargs): - return StableDiffusionXLImg2ImgPipelineMixin.__call__(self, *args, **kwargs) + +@add_end_docstrings(ONNX_MODEL_END_DOCSTRING) +class ORTStableDiffusionXLImg2ImgPipeline(StableDiffusionXLImg2ImgPipelineMixin, ORTStableDiffusionXLPipelineBase): + """ + ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.StableDiffusionXLImg2ImgPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#diffusers.StableDiffusionXLImg2ImgPipeline). + """ + + pass diff --git a/optimum/pipelines/diffusers/pipeline_stable_diffusion.py b/optimum/pipelines/diffusers/pipeline_stable_diffusion.py index 0f5b3c3b33..d26aae66be 100644 --- a/optimum/pipelines/diffusers/pipeline_stable_diffusion.py +++ b/optimum/pipelines/diffusers/pipeline_stable_diffusion.py @@ -135,8 +135,8 @@ def _encode_prompt( def check_inputs( self, prompt: Union[str, List[str]], - height: Optional[int], - width: Optional[int], + height: int, + width: int, callback_steps: int, negative_prompt: Optional[str] = None, prompt_embeds: Optional[np.ndarray] = None, @@ -226,10 +226,10 @@ def __call__( prompt (`Optional[Union[str, List[str]]]`, defaults to None): The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`. instead. - height (`Optional[int]`, defaults to None): - The height in pixels of the generated image. - width (`Optional[int]`, defaults to None): - The width in pixels of the generated image. + height (`Optional[int]`, defaults to `None`): + The height in pixels of the generated image. If `None`, defaults to `self.unet.config.sample_size * self.vae_scale_factor` + width (`Optional[int]`, defaults to `None`): + The width in pixels of the generated image. If `None`, defaults to `self.unet.config.sample_size * self.vae_scale_factor` num_inference_steps (`int`, defaults to 50): The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference. @@ -248,7 +248,7 @@ def __call__( eta (`float`, defaults to 0.0): Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to [`schedulers.DDIMScheduler`], will be ignored for others. - generator (`Optional[np.random.RandomState]`, defaults to `None`):: + generator (`Optional[np.random.RandomState]`, defaults to `None`): A np.random.RandomState to make generation deterministic. latents (`Optional[np.ndarray]`, defaults to `None`): Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image diff --git a/optimum/pipelines/diffusers/pipeline_stable_diffusion_inpaint.py b/optimum/pipelines/diffusers/pipeline_stable_diffusion_inpaint.py index e2a7ac7c9e..9f029b3d69 100644 --- a/optimum/pipelines/diffusers/pipeline_stable_diffusion_inpaint.py +++ b/optimum/pipelines/diffusers/pipeline_stable_diffusion_inpaint.py @@ -55,8 +55,8 @@ class StableDiffusionInpaintPipelineMixin(StableDiffusionPipelineMixin): def check_inputs( self, prompt: Union[str, List[str]], - height: Optional[int], - width: Optional[int], + height: int, + width: int, callback_steps: int, negative_prompt: Optional[str] = None, prompt_embeds: Optional[np.ndarray] = None, @@ -133,9 +133,9 @@ def __call__( mask_image (`PIL.Image.Image`): `Image`, or tensor representing a masked image batch which will be upscaled. height (`Optional[int]`, defaults to None): - The height in pixels of the generated image. + The height in pixels of the generated image. If `None`, defaults to `self.unet.config.sample_size * self.vae_scale_factor`. width (`Optional[int]`, defaults to None): - The width in pixels of the generated image. + The width in pixels of the generated image. If `None`, defaults to `self.unet.config.sample_size * self.vae_scale_factor`. num_inference_steps (`int`, defaults to 50): The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference. diff --git a/optimum/pipelines/diffusers/pipeline_stable_diffusion_xl.py b/optimum/pipelines/diffusers/pipeline_stable_diffusion_xl.py index 4c8c015fed..a341083c99 100644 --- a/optimum/pipelines/diffusers/pipeline_stable_diffusion_xl.py +++ b/optimum/pipelines/diffusers/pipeline_stable_diffusion_xl.py @@ -293,10 +293,10 @@ def __call__( prompt (`Optional[Union[str, List[str]]]`, defaults to None): The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`. instead. - height (`Optional[int]`, defaults to None): - The height in pixels of the generated image. - width (`Optional[int]`, defaults to None): - The width in pixels of the generated image. + height (`Optional[int]`, defaults to `None`): + The height in pixels of the generated image. If `None`, defaults to `self.unet.config.sample_size * self.vae_scale_factor`. + width (`Optional[int]`, defaults to `None`): + The width in pixels of the generated image. If `None`, defaults to `self.unet.config.sample_size * self.vae_scale_factor`. num_inference_steps (`int`, defaults to 50): The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference. @@ -315,7 +315,7 @@ def __call__( eta (`float`, defaults to 0.0): Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to [`schedulers.DDIMScheduler`], will be ignored for others. - generator (`Optional[np.random.RandomState]`, defaults to `None`):: + generator (`Optional[np.random.RandomState]`, defaults to `None`): A np.random.RandomState to make generation deterministic. latents (`Optional[np.ndarray]`, defaults to `None`): Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image diff --git a/optimum/pipelines/diffusers/pipeline_stable_diffusion_xl_img2img.py b/optimum/pipelines/diffusers/pipeline_stable_diffusion_xl_img2img.py index 4a2b48d38e..66171163aa 100644 --- a/optimum/pipelines/diffusers/pipeline_stable_diffusion_xl_img2img.py +++ b/optimum/pipelines/diffusers/pipeline_stable_diffusion_xl_img2img.py @@ -325,7 +325,7 @@ def __call__( eta (`float`, defaults to 0.0): Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to [`schedulers.DDIMScheduler`], will be ignored for others. - generator (`Optional[np.random.RandomState]`, defaults to `None`):: + generator (`Optional[np.random.RandomState]`, defaults to `None`): A np.random.RandomState to make generation deterministic. latents (`Optional[np.ndarray]`, defaults to `None`): Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image