From b55b06fe3ed33659372ccddb9d19133766350ba4 Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Sun, 13 Oct 2024 10:29:14 +0200 Subject: [PATCH] add io binding tests on cpu --- tests/onnxruntime/test_diffusion.py | 166 +++++++++++++++------------- 1 file changed, 89 insertions(+), 77 deletions(-) diff --git a/tests/onnxruntime/test_diffusion.py b/tests/onnxruntime/test_diffusion.py index 8f3f910f8a..eea7e98ac2 100644 --- a/tests/onnxruntime/test_diffusion.py +++ b/tests/onnxruntime/test_diffusion.py @@ -138,13 +138,15 @@ def test_compare_to_diffusers_pipeline(self, model_arch: str): ort_pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) diffusers_pipeline = self.AUTOMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch]) - for output_type in ["latent", "np", "pt"]: - inputs["output_type"] = output_type + for use_io_binding in [False, True]: + for output_type in ["latent", "np", "pt"]: + inputs["output_type"] = output_type + ort_pipeline.use_io_binding = use_io_binding - ort_output = ort_pipeline(**inputs, generator=get_generator("pt", SEED)).images - diffusers_output = diffusers_pipeline(**inputs, generator=get_generator("pt", SEED)).images + ort_images = ort_pipeline(**inputs, generator=get_generator("pt", SEED)).images + diffusers_images = diffusers_pipeline(**inputs, generator=get_generator("pt", SEED)).images - np.testing.assert_allclose(ort_output, diffusers_output, atol=1e-4, rtol=1e-2) + np.testing.assert_allclose(ort_images, diffusers_images, atol=1e-4, rtol=1e-2) @parameterized.expand(SUPPORTED_ARCHITECTURES) @require_diffusers @@ -280,14 +282,16 @@ def test_pipeline_on_gpu(self, test_name: str, model_arch: str, provider: str): height, width, batch_size = 32, 64, 2 inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) - pipeline = self.ORTMODEL_CLASS.from_pretrained( - self.onnx_model_dirs[test_name], provider=provider, use_io_binding=True - ) + pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[test_name], provider=provider) self.assertEqual(pipeline.device.type, "cuda") - outputs = pipeline(**inputs).images - self.assertIsInstance(outputs, np.ndarray) - self.assertEqual(outputs.shape, (batch_size, height, width, 3)) + for use_io_binding in [False, True]: + pipeline.use_io_binding = use_io_binding + + images = pipeline(**inputs).images + + self.assertIsInstance(images, np.ndarray) + self.assertEqual(images.shape, (batch_size, height, width, 3)) @parameterized.expand(["stable-diffusion", "latent-consistency"]) @require_diffusers @@ -446,16 +450,18 @@ def test_compare_to_diffusers_pipeline(self, model_arch: str): height, width, batch_size = 128, 128, 1 inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) - diffusers_pipeline = self.AUTOMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch]) ort_pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) + diffusers_pipeline = self.AUTOMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch]) - for output_type in ["latent", "np", "pt"]: - inputs["output_type"] = output_type + for use_io_binding in [False, True]: + for output_type in ["latent", "np", "pt"]: + inputs["output_type"] = output_type + ort_pipeline.use_io_binding = use_io_binding - ort_output = ort_pipeline(**inputs, generator=get_generator("pt", SEED)).images - diffusers_output = diffusers_pipeline(**inputs, generator=get_generator("pt", SEED)).images + ort_images = ort_pipeline(**inputs, generator=get_generator("pt", SEED)).images + diffusers_images = diffusers_pipeline(**inputs, generator=get_generator("pt", SEED)).images - np.testing.assert_allclose(ort_output, diffusers_output, atol=1e-4, rtol=1e-2) + np.testing.assert_allclose(ort_images, diffusers_images, atol=1e-4, rtol=1e-2) @parameterized.expand(SUPPORTED_ARCHITECTURES) @require_diffusers @@ -476,32 +482,6 @@ def test_image_reproducibility(self, model_arch: str): self.assertFalse(np.array_equal(ort_outputs_1.images[0], ort_outputs_3.images[0])) np.testing.assert_allclose(ort_outputs_1.images[0], ort_outputs_2.images[0], atol=1e-4, rtol=1e-2) - @parameterized.expand( - grid_parameters( - { - "model_arch": SUPPORTED_ARCHITECTURES, - "provider": ["CUDAExecutionProvider", "ROCMExecutionProvider", "TensorrtExecutionProvider"], - } - ) - ) - @require_torch_gpu - @require_diffusers - def test_pipeline_on_gpu(self, test_name: str, model_arch: str, provider: str): - model_args = {"test_name": test_name, "model_arch": model_arch} - self._setup(model_args) - - height, width, batch_size = 32, 64, 2 - inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) - - pipeline = self.ORTMODEL_CLASS.from_pretrained( - self.onnx_model_dirs[test_name], provider=provider, use_io_binding=True - ) - self.assertEqual(pipeline.device.type, "cuda") - - outputs = pipeline(**inputs).images - self.assertIsInstance(outputs, np.ndarray) - self.assertEqual(outputs.shape, (batch_size, height, width, 3)) - @parameterized.expand(["stable-diffusion", "latent-consistency"]) @require_diffusers def test_safety_checker(self, model_arch: str): @@ -536,6 +516,34 @@ def test_safety_checker(self, model_arch: str): np.testing.assert_allclose(ort_images, diffusers_images, atol=1e-4, rtol=1e-2) + @parameterized.expand( + grid_parameters( + { + "model_arch": SUPPORTED_ARCHITECTURES, + "provider": ["CUDAExecutionProvider", "ROCMExecutionProvider", "TensorrtExecutionProvider"], + } + ) + ) + @require_torch_gpu + @require_diffusers + def test_pipeline_on_gpu(self, test_name: str, model_arch: str, provider: str): + model_args = {"test_name": test_name, "model_arch": model_arch} + self._setup(model_args) + + height, width, batch_size = 32, 64, 2 + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + + pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[test_name], provider=provider) + self.assertEqual(pipeline.device.type, "cuda") + + for use_io_binding in [False, True]: + pipeline.use_io_binding = use_io_binding + + images = pipeline(**inputs).images + + self.assertIsInstance(images, np.ndarray) + self.assertEqual(images.shape, (batch_size, height, width, 3)) + class ORTPipelineForInpaintingTest(ORTModelTestMixin): SUPPORTED_ARCHITECTURES = ["stable-diffusion", "stable-diffusion-xl"] @@ -662,19 +670,21 @@ def test_compare_to_diffusers_pipeline(self, model_arch: str): model_args = {"test_name": model_arch, "model_arch": model_arch} self._setup(model_args) + height, width, batch_size = 128, 128, 1 + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + ort_pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) diffusers_pipeline = self.AUTOMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch]) - height, width, batch_size = 64, 64, 1 - inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) - - for output_type in ["latent", "np", "pt"]: - inputs["output_type"] = output_type + for use_io_binding in [False, True]: + for output_type in ["latent", "np", "pt"]: + inputs["output_type"] = output_type + ort_pipeline.use_io_binding = use_io_binding - ort_output = ort_pipeline(**inputs, generator=get_generator("pt", SEED)).images - diffusers_output = diffusers_pipeline(**inputs, generator=get_generator("pt", SEED)).images + ort_images = ort_pipeline(**inputs, generator=get_generator("pt", SEED)).images + diffusers_images = diffusers_pipeline(**inputs, generator=get_generator("pt", SEED)).images - np.testing.assert_allclose(ort_output, diffusers_output, atol=1e-4, rtol=1e-2) + np.testing.assert_allclose(ort_images, diffusers_images, atol=1e-4, rtol=1e-2) @parameterized.expand(SUPPORTED_ARCHITECTURES) @require_diffusers @@ -695,32 +705,6 @@ def test_image_reproducibility(self, model_arch: str): self.assertFalse(np.array_equal(ort_outputs_1.images[0], ort_outputs_3.images[0])) np.testing.assert_allclose(ort_outputs_1.images[0], ort_outputs_2.images[0], atol=1e-4, rtol=1e-2) - @parameterized.expand( - grid_parameters( - { - "model_arch": SUPPORTED_ARCHITECTURES, - "provider": ["CUDAExecutionProvider", "ROCMExecutionProvider", "TensorrtExecutionProvider"], - } - ) - ) - @require_torch_gpu - @require_diffusers - def test_pipeline_on_gpu(self, test_name: str, model_arch: str, provider: str): - model_args = {"test_name": test_name, "model_arch": model_arch} - self._setup(model_args) - - height, width, batch_size = 32, 64, 2 - inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) - - pipeline = self.ORTMODEL_CLASS.from_pretrained( - self.onnx_model_dirs[test_name], provider=provider, use_io_binding=True - ) - self.assertEqual(pipeline.device.type, "cuda") - - outputs = pipeline(**inputs).images - self.assertIsInstance(outputs, np.ndarray) - self.assertEqual(outputs.shape, (batch_size, height, width, 3)) - @parameterized.expand(["stable-diffusion"]) @require_diffusers def test_safety_checker(self, model_arch: str): @@ -753,3 +737,31 @@ def test_safety_checker(self, model_arch: str): ort_images = ort_output.images diffusers_images = diffusers_output.images np.testing.assert_allclose(ort_images, diffusers_images, atol=1e-4, rtol=1e-2) + + @parameterized.expand( + grid_parameters( + { + "model_arch": SUPPORTED_ARCHITECTURES, + "provider": ["CUDAExecutionProvider", "ROCMExecutionProvider", "TensorrtExecutionProvider"], + } + ) + ) + @require_torch_gpu + @require_diffusers + def test_pipeline_on_gpu(self, test_name: str, model_arch: str, provider: str): + model_args = {"test_name": test_name, "model_arch": model_arch} + self._setup(model_args) + + height, width, batch_size = 32, 64, 2 + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + + pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[test_name], provider=provider) + self.assertEqual(pipeline.device.type, "cuda") + + for use_io_binding in [False, True]: + pipeline.use_io_binding = use_io_binding + + images = pipeline(**inputs).images + + self.assertIsInstance(images, np.ndarray) + self.assertEqual(images.shape, (batch_size, height, width, 3))