diff --git a/.github/workflows/pkgci_regression_test.yml b/.github/workflows/pkgci_regression_test.yml index 950bf48a2b35..8b0b4e0189d2 100644 --- a/.github/workflows/pkgci_regression_test.yml +++ b/.github/workflows/pkgci_regression_test.yml @@ -221,7 +221,7 @@ jobs: --goldentime-rocm-clip-ms 18.5 \ --goldentime-rocm-vae-ms 337.0 \ --goldendispatch-rocm-unet 1531 \ - --goldendispatch-rocm-clip 1141 \ + --goldendispatch-rocm-clip 1139 \ --goldendispatch-rocm-vae 246 \ --goldensize-rocm-unet-bytes 2280000 \ --goldensize-rocm-clip-bytes 860000 \ @@ -243,7 +243,7 @@ jobs: --goldentime-rocm-clip-ms 15.5 \ --goldentime-rocm-vae-ms 80.0 \ --goldendispatch-rocm-unet 1531 \ - --goldendispatch-rocm-clip 1141 \ + --goldendispatch-rocm-clip 1139 \ --goldendispatch-rocm-vae 246 \ --goldensize-rocm-unet-bytes 2270000 \ --goldensize-rocm-clip-bytes 860000 \ diff --git a/compiler/src/iree/compiler/GlobalOptimization/Passes.cpp b/compiler/src/iree/compiler/GlobalOptimization/Passes.cpp index 94c78b9f0464..0766e0347461 100644 --- a/compiler/src/iree/compiler/GlobalOptimization/Passes.cpp +++ b/compiler/src/iree/compiler/GlobalOptimization/Passes.cpp @@ -95,6 +95,8 @@ void buildGlobalOptimizationPassPipeline( // Preprocessing passes to get the program into a canonical state. FunctionLikeNest(mainPassManager) + .addPredicatedPass(transformOptions.options.stripAssertions, + IREE::Util::createStripDebugOpsPass) .addPass(IREE::Util::createOptimizeIntArithmeticPass) .addPass(createLinalgQuantizedConvToConvPass) .addPass(createLinalgQuantizedMatmulToMatmulPass) @@ -217,16 +219,10 @@ void buildGlobalOptimizationPassPipeline( FunctionLikeNest(mainPassManager) .addPass(IREE::Flow::createCanonicalizerPass) - .addPass(mlir::createCSEPass); - - FunctionLikeNest(mainPassManager) + .addPass(mlir::createCSEPass) // After running const-eval to a fixed point and folding unit extent dims, // try any new raising opportunities. - .addPass(createRaiseSpecialOpsPass) - // Strip std.assert & co after we perform optimizations; prior to this we - // may use the assertions to derive information during analysis. - .addPredicatedPass(transformOptions.options.stripAssertions, - IREE::Util::createStripDebugOpsPass); + .addPass(createRaiseSpecialOpsPass); // Export after const-eval. If the user wants to keep the input constants // as is in the final parameter archive, they will probably want to disable diff --git a/experimental/regression_suite/shark-test-suite-models/sdxl/test_clip.py b/experimental/regression_suite/shark-test-suite-models/sdxl/test_clip.py index 4e5c48a82a90..416f57bc3955 100644 --- a/experimental/regression_suite/shark-test-suite-models/sdxl/test_clip.py +++ b/experimental/regression_suite/shark-test-suite-models/sdxl/test_clip.py @@ -63,6 +63,7 @@ "--iree-llvmcpu-fail-on-out-of-bounds-stack-allocation=false", "--iree-llvmcpu-distribution-size=32", "--iree-opt-const-eval=false", + "--iree-opt-strip-assertions=true", "--iree-llvmcpu-enable-ukernels=all", "--iree-global-opt-enable-quantized-matmul-reassociation", ] @@ -92,6 +93,7 @@ def SDXL_CLIP_COMMON_RUN_FLAGS( f"--iree-hip-target={rocm_chip}", "--iree-input-type=torch", "--iree-opt-const-eval=false", + "--iree-opt-strip-assertions=true", "--iree-global-opt-propagate-transposes=true", "--iree-opt-outer-dim-concat=true", "--iree-hip-waves-per-eu=2", diff --git a/experimental/regression_suite/shark-test-suite-models/sdxl/test_unet.py b/experimental/regression_suite/shark-test-suite-models/sdxl/test_unet.py index 990b35fc1688..415f710281cb 100644 --- a/experimental/regression_suite/shark-test-suite-models/sdxl/test_unet.py +++ b/experimental/regression_suite/shark-test-suite-models/sdxl/test_unet.py @@ -131,6 +131,7 @@ "--iree-llvmcpu-fail-on-out-of-bounds-stack-allocation=false", "--iree-llvmcpu-distribution-size=32", "--iree-opt-const-eval=false", + "--iree-opt-strip-assertions=true", "--iree-llvmcpu-enable-ukernels=all", "--iree-global-opt-enable-quantized-matmul-reassociation", ] @@ -194,6 +195,7 @@ def SDXL_PUNET_INT8_FP8_OUT( "--iree-hal-target-backends=rocm", f"--iree-hip-target={rocm_chip}", "--iree-opt-const-eval=false", + "--iree-opt-strip-assertions=true", "--iree-global-opt-propagate-transposes=true", "--iree-dispatch-creation-enable-fuse-horizontal-contractions=true", "--iree-dispatch-creation-enable-aggressive-fusion=true", diff --git a/experimental/regression_suite/shark-test-suite-models/sdxl/test_vae.py b/experimental/regression_suite/shark-test-suite-models/sdxl/test_vae.py index 1267f628727a..42eb58f5d934 100644 --- a/experimental/regression_suite/shark-test-suite-models/sdxl/test_vae.py +++ b/experimental/regression_suite/shark-test-suite-models/sdxl/test_vae.py @@ -43,6 +43,7 @@ "--iree-llvmcpu-fail-on-out-of-bounds-stack-allocation=false", "--iree-llvmcpu-distribution-size=32", "--iree-opt-const-eval=false", + "--iree-opt-strip-assertions=true", "--iree-llvmcpu-enable-ukernels=all", "--iree-global-opt-enable-quantized-matmul-reassociation", ] @@ -63,6 +64,7 @@ def SDXL_VAE_COMMON_RUN_FLAGS( "--iree-hal-target-backends=rocm", f"--iree-hip-target={rocm_chip}", "--iree-opt-const-eval=false", + "--iree-opt-strip-assertions=true", "--iree-global-opt-propagate-transposes=true", "--iree-opt-outer-dim-concat=true", "--iree-llvmgpu-enable-prefetch=true",