From 06dc480b521ecfdce89d30a290bb1e92ea220c10 Mon Sep 17 00:00:00 2001 From: Emilien Bauer Date: Mon, 19 Aug 2024 12:43:27 +0100 Subject: [PATCH 01/25] !!! --- devito/ir/xdsl_iet/cluster_to_ssa.py | 30 ++++++++++++---------------- devito/xdsl_core/xdsl_cpu.py | 17 +++++++++++++--- tests/test_xdsl_base.py | 3 ++- 3 files changed, 29 insertions(+), 21 deletions(-) diff --git a/devito/ir/xdsl_iet/cluster_to_ssa.py b/devito/ir/xdsl_iet/cluster_to_ssa.py index 37713b5322..48d2489d12 100644 --- a/devito/ir/xdsl_iet/cluster_to_ssa.py +++ b/devito/ir/xdsl_iet/cluster_to_ssa.py @@ -287,7 +287,7 @@ def _visit_math_nodes(self, dim: SteppingDimension, node: Expr, SSAargs = (self._visit_math_nodes(dim, arg, output_indexed) for arg in node.args) return reduce(lambda x, y : arith.AndI(x, y).result, SSAargs) - + # Trigonometric functions elif isinstance(node, sin): assert len(node.args) == 1, "Expected single argument for sin." @@ -298,13 +298,13 @@ def _visit_math_nodes(self, dim: SteppingDimension, node: Expr, assert len(node.args) == 1, "Expected single argument for cos." return math.CosOp(self._visit_math_nodes(dim, node.args[0], output_indexed)).result - + elif isinstance(node, tan): assert len(node.args) == 1, "Expected single argument for TanOp." - + return math.TanOp(self._visit_math_nodes(dim, node.args[0], output_indexed)).result - + elif isinstance(node, Relational): if isinstance(node, GreaterThan): mnemonic = "sge" @@ -391,12 +391,10 @@ def build_stencil_step(self, dim: SteppingDimension, eq: LoweredEq) -> None: apply.res[0], self.function_values[self.out_time_buffer], stencil.StencilBoundsAttr(zip(lb, ub)), - stencil.TempType(len(shape), - element_type=dtype_to_xdsltype(write_function.dtype)) ) - - store.temp_with_halo.name_hint = f"{write_function.name}_t{self.out_time_buffer[1]}_temp" # noqa - self.temps[self.out_time_buffer] = store.temp_with_halo + load = stencil.LoadOp.get(self.function_values[self.out_time_buffer]) + load.res.name_hint = f"{write_function.name}_t{self.out_time_buffer[1]}_temp" # noqa + self.temps[self.out_time_buffer] = load.res def build_generic_step_expression(self, dim: SteppingDimension, eq: LoweredEq): # Sources @@ -439,7 +437,6 @@ def build_condition(self, dim: SteppingDimension, eq: BooleanFunction): self.build_generic_step_expression(dim, eq) scf.Yield() - def build_time_loop( self, eqs: list[Any], step_dim: SteppingDimension, **kwargs ): @@ -450,7 +447,7 @@ def build_time_loop( ub = iet_ssa.LoadSymbolic.get( step_dim.symbolic_max._C_name, IndexType() ) - + one = arith.Constant.from_int_and_width(1, IndexType()) # Devito iterates from time_m to time_M *inclusive*, MLIR only takes @@ -497,7 +494,7 @@ def build_time_loop( for i, (f, t) in enumerate(self.time_buffers) } self.function_values |= self.block_args - + # Name the block argument for debugging for (f, t), arg in self.block_args.items(): arg.name_hint = f"{f.name}_t{t}" @@ -513,8 +510,7 @@ def build_time_loop( def lower_devito_Eqs(self, eqs: list[Any], **kwargs): # Lower devito Equations to xDSL - - + for eq in eqs: lowered = self.operator._lower_exprs(as_tuple(eq), **kwargs) if isinstance(eq, Eq): @@ -546,7 +542,7 @@ def _lower_injection(self, eqs: list[LoweredEq]): lb = arith.Constant.from_int_and_width(int(lower), IndexType()) else: raise NotImplementedError(f"Lower bound of type {type(lower)} not supported") - + try: name = interval.dim.symbolic_min.name except: @@ -633,7 +629,7 @@ def convert(self, eqs: Iterable[Eq], **kwargs) -> ModuleOp: # Instantiate the module. self.function_values: dict[tuple[Function, int], SSAValue] = {} self.symbol_values: dict[str, SSAValue] = {} - + module = ModuleOp(Region([block := Block([])])) with ImplicitBuilder(block): # Get all functions used in the equations @@ -647,7 +643,7 @@ def convert(self, eqs: Iterable[Eq], **kwargs) -> ModuleOp: functions.add(f.function) elif isinstance(eq, Injection): - + functions.add(eq.field.function) for f in retrieve_functions(eq.expr): if isinstance(f, PointSource): diff --git a/devito/xdsl_core/xdsl_cpu.py b/devito/xdsl_core/xdsl_cpu.py index cd86fc0d9c..e56d6a9a5d 100644 --- a/devito/xdsl_core/xdsl_cpu.py +++ b/devito/xdsl_core/xdsl_cpu.py @@ -460,6 +460,7 @@ def _jit_compile(self): # Run the first pipeline, mostly xDSL-centric xdsl_args = [source_name, "--allow-unregistered-dialect", + "--disable-verify", "-p", xdsl_pipeline[1:-1],] # We use the Python API to run xDSL rather than a subprocess @@ -597,7 +598,10 @@ def generate_MLIR_OPENMP_PIPELINE(kwargs): def generate_XDSL_CPU_PIPELINE(nb_tiled_dims): passes = [ - "stencil-shape-inference", + "canonicalize", + "cse", + "shape-inference", + "stencil-bufferize", "convert-stencil-to-ll-mlir", f"scf-parallel-loop-tiling{{{generate_tiling_arg(nb_tiled_dims)}}}", "printf-to-llvm", @@ -609,7 +613,10 @@ def generate_XDSL_CPU_PIPELINE(nb_tiled_dims): def generate_XDSL_CPU_noop_PIPELINE(): passes = [ - "stencil-shape-inference", + "canonicalize", + "cse", + "shape-inference", + "stencil-bufferize", "convert-stencil-to-ll-mlir", "printf-to-llvm" ] @@ -619,11 +626,15 @@ def generate_XDSL_CPU_noop_PIPELINE(): def generate_XDSL_MPI_PIPELINE(decomp, nb_tiled_dims): passes = [ + "canonicalize", + "cse", f"distribute-stencil{decomp}", + "shape-inference", "canonicalize-dmp", + "stencil-bufferize", + "dmp-to-mpi{mpi_init=false}", "convert-stencil-to-ll-mlir", f"scf-parallel-loop-tiling{{{generate_tiling_arg(nb_tiled_dims)}}}", - "dmp-to-mpi{mpi_init=false}", "lower-mpi", "printf-to-llvm", "canonicalize" diff --git a/tests/test_xdsl_base.py b/tests/test_xdsl_base.py index 39aa97828b..72ddfcb16a 100644 --- a/tests/test_xdsl_base.py +++ b/tests/test_xdsl_base.py @@ -73,7 +73,8 @@ def test_xdsl_III(): assert isinstance(scffor_ops[0], LoadOp) assert isinstance(scffor_ops[1], ApplyOp) assert isinstance(scffor_ops[2], StoreOp) - assert isinstance(scffor_ops[3], Yield) + assert isinstance(scffor_ops[3], LoadOp) + assert isinstance(scffor_ops[4], Yield) assert type(ops[7] == Call) assert type(ops[8] == StoreOp) From 0ab86cab8d117ca72c9b7a65dd704d816fb72acb Mon Sep 17 00:00:00 2001 From: Emilien Bauer Date: Mon, 19 Aug 2024 12:45:36 +0100 Subject: [PATCH 02/25] (CI hack) use new temp xDSL commits with latest tweaks. --- .github/workflows/ci-lit.yml | 2 +- .github/workflows/ci-mlir-mpi-openmp.yml | 2 +- .github/workflows/ci-mlir-mpi.yml | 2 +- .github/workflows/ci-mlir-openmp.yml | 2 +- .github/workflows/ci-mlir.yml | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci-lit.yml b/.github/workflows/ci-lit.yml index 94f7821666..1979a22cc6 100644 --- a/.github/workflows/ci-lit.yml +++ b/.github/workflows/ci-lit.yml @@ -27,7 +27,7 @@ jobs: - name: Install requirements and xDSL run: | - pip install git+https://github.com/xdslproject/xdsl@f8bb935880276cf077e0a80f1905105d0a98eb33 + pip install git+https://github.com/xdslproject/xdsl@540da57a7bcc5f05d3b98da0ea1f88420a3dbdf0 pip install -e .[tests] - name: Execute lit tests diff --git a/.github/workflows/ci-mlir-mpi-openmp.yml b/.github/workflows/ci-mlir-mpi-openmp.yml index b1000d71d5..0193daf44b 100644 --- a/.github/workflows/ci-mlir-mpi-openmp.yml +++ b/.github/workflows/ci-mlir-mpi-openmp.yml @@ -36,7 +36,7 @@ jobs: run: | pip install -e .[tests] pip install mpi4py - pip install git+https://github.com/xdslproject/xdsl@f8bb935880276cf077e0a80f1905105d0a98eb33 + pip install git+https://github.com/xdslproject/xdsl@540da57a7bcc5f05d3b98da0ea1f88420a3dbdf0 - name: Test with MPI + openmp run: | diff --git a/.github/workflows/ci-mlir-mpi.yml b/.github/workflows/ci-mlir-mpi.yml index e8c086a5b7..7ebcfe6fa2 100644 --- a/.github/workflows/ci-mlir-mpi.yml +++ b/.github/workflows/ci-mlir-mpi.yml @@ -36,7 +36,7 @@ jobs: run: | pip install -e .[tests] pip install mpi4py - pip install git+https://github.com/xdslproject/xdsl@f8bb935880276cf077e0a80f1905105d0a98eb33 + pip install git+https://github.com/xdslproject/xdsl@540da57a7bcc5f05d3b98da0ea1f88420a3dbdf0 - name: Test with MPI - no Openmp run: | diff --git a/.github/workflows/ci-mlir-openmp.yml b/.github/workflows/ci-mlir-openmp.yml index aa36a701a5..0967be3216 100644 --- a/.github/workflows/ci-mlir-openmp.yml +++ b/.github/workflows/ci-mlir-openmp.yml @@ -36,7 +36,7 @@ jobs: run: | pip install -e .[tests] pip install mpi4py - pip install git+https://github.com/xdslproject/xdsl@f8bb935880276cf077e0a80f1905105d0a98eb33 + pip install git+https://github.com/xdslproject/xdsl@540da57a7bcc5f05d3b98da0ea1f88420a3dbdf0 - name: Test no-MPI, Openmp run: | diff --git a/.github/workflows/ci-mlir.yml b/.github/workflows/ci-mlir.yml index 1b8df3226e..7b5245ff37 100644 --- a/.github/workflows/ci-mlir.yml +++ b/.github/workflows/ci-mlir.yml @@ -35,7 +35,7 @@ jobs: - name: Install requirements and xDSL run: | pip install -e .[tests] - pip install git+https://github.com/xdslproject/xdsl@f8bb935880276cf077e0a80f1905105d0a98eb33 + pip install git+https://github.com/xdslproject/xdsl@540da57a7bcc5f05d3b98da0ea1f88420a3dbdf0 - name: Test no-MPI, no-Openmp run: | From dc2ee6583a1dc9677ece60ed6369d7f739224e03 Mon Sep 17 00:00:00 2001 From: Emilien Bauer Date: Tue, 20 Aug 2024 13:43:26 +0100 Subject: [PATCH 03/25] Try with hopefully fixed extract_strided_metadata. --- .github/workflows/ci-lit.yml | 2 +- .github/workflows/ci-mlir-mpi-openmp.yml | 2 +- .github/workflows/ci-mlir-mpi.yml | 2 +- .github/workflows/ci-mlir-openmp.yml | 2 +- .github/workflows/ci-mlir.yml | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci-lit.yml b/.github/workflows/ci-lit.yml index 1979a22cc6..3423c29f2c 100644 --- a/.github/workflows/ci-lit.yml +++ b/.github/workflows/ci-lit.yml @@ -27,7 +27,7 @@ jobs: - name: Install requirements and xDSL run: | - pip install git+https://github.com/xdslproject/xdsl@540da57a7bcc5f05d3b98da0ea1f88420a3dbdf0 + pip install git+https://github.com/xdslproject/xdsl@0a2420cdf0f337f8237ec142171d5c8f046daf19 pip install -e .[tests] - name: Execute lit tests diff --git a/.github/workflows/ci-mlir-mpi-openmp.yml b/.github/workflows/ci-mlir-mpi-openmp.yml index 0193daf44b..35f42f8822 100644 --- a/.github/workflows/ci-mlir-mpi-openmp.yml +++ b/.github/workflows/ci-mlir-mpi-openmp.yml @@ -36,7 +36,7 @@ jobs: run: | pip install -e .[tests] pip install mpi4py - pip install git+https://github.com/xdslproject/xdsl@540da57a7bcc5f05d3b98da0ea1f88420a3dbdf0 + pip install git+https://github.com/xdslproject/xdsl@0a2420cdf0f337f8237ec142171d5c8f046daf19 - name: Test with MPI + openmp run: | diff --git a/.github/workflows/ci-mlir-mpi.yml b/.github/workflows/ci-mlir-mpi.yml index 7ebcfe6fa2..bb5d518367 100644 --- a/.github/workflows/ci-mlir-mpi.yml +++ b/.github/workflows/ci-mlir-mpi.yml @@ -36,7 +36,7 @@ jobs: run: | pip install -e .[tests] pip install mpi4py - pip install git+https://github.com/xdslproject/xdsl@540da57a7bcc5f05d3b98da0ea1f88420a3dbdf0 + pip install git+https://github.com/xdslproject/xdsl@0a2420cdf0f337f8237ec142171d5c8f046daf19 - name: Test with MPI - no Openmp run: | diff --git a/.github/workflows/ci-mlir-openmp.yml b/.github/workflows/ci-mlir-openmp.yml index 0967be3216..1df134168a 100644 --- a/.github/workflows/ci-mlir-openmp.yml +++ b/.github/workflows/ci-mlir-openmp.yml @@ -36,7 +36,7 @@ jobs: run: | pip install -e .[tests] pip install mpi4py - pip install git+https://github.com/xdslproject/xdsl@540da57a7bcc5f05d3b98da0ea1f88420a3dbdf0 + pip install git+https://github.com/xdslproject/xdsl@0a2420cdf0f337f8237ec142171d5c8f046daf19 - name: Test no-MPI, Openmp run: | diff --git a/.github/workflows/ci-mlir.yml b/.github/workflows/ci-mlir.yml index 7b5245ff37..b010fdcc37 100644 --- a/.github/workflows/ci-mlir.yml +++ b/.github/workflows/ci-mlir.yml @@ -35,7 +35,7 @@ jobs: - name: Install requirements and xDSL run: | pip install -e .[tests] - pip install git+https://github.com/xdslproject/xdsl@540da57a7bcc5f05d3b98da0ea1f88420a3dbdf0 + pip install git+https://github.com/xdslproject/xdsl@0a2420cdf0f337f8237ec142171d5c8f046daf19 - name: Test no-MPI, no-Openmp run: | From 4e6452a2f65ffe267a9d859d837e96c546323b4f Mon Sep 17 00:00:00 2001 From: Emilien Bauer Date: Tue, 20 Aug 2024 14:08:19 +0100 Subject: [PATCH 04/25] Post module __init__ fix. --- .github/workflows/ci-lit.yml | 2 +- .github/workflows/ci-mlir-mpi-openmp.yml | 2 +- .github/workflows/ci-mlir-mpi.yml | 2 +- .github/workflows/ci-mlir-openmp.yml | 2 +- .github/workflows/ci-mlir.yml | 2 +- tests/filecheck/.lit_test_times.txt | 9 +++++++++ 6 files changed, 14 insertions(+), 5 deletions(-) create mode 100644 tests/filecheck/.lit_test_times.txt diff --git a/.github/workflows/ci-lit.yml b/.github/workflows/ci-lit.yml index 3423c29f2c..83a3852517 100644 --- a/.github/workflows/ci-lit.yml +++ b/.github/workflows/ci-lit.yml @@ -27,7 +27,7 @@ jobs: - name: Install requirements and xDSL run: | - pip install git+https://github.com/xdslproject/xdsl@0a2420cdf0f337f8237ec142171d5c8f046daf19 + pip install git+https://github.com/xdslproject/xdsl@306c5e00e3f715ae6710467bb1179e9fb3f394c8 pip install -e .[tests] - name: Execute lit tests diff --git a/.github/workflows/ci-mlir-mpi-openmp.yml b/.github/workflows/ci-mlir-mpi-openmp.yml index 35f42f8822..5a8cacaf51 100644 --- a/.github/workflows/ci-mlir-mpi-openmp.yml +++ b/.github/workflows/ci-mlir-mpi-openmp.yml @@ -36,7 +36,7 @@ jobs: run: | pip install -e .[tests] pip install mpi4py - pip install git+https://github.com/xdslproject/xdsl@0a2420cdf0f337f8237ec142171d5c8f046daf19 + pip install git+https://github.com/xdslproject/xdsl@306c5e00e3f715ae6710467bb1179e9fb3f394c8 - name: Test with MPI + openmp run: | diff --git a/.github/workflows/ci-mlir-mpi.yml b/.github/workflows/ci-mlir-mpi.yml index bb5d518367..3abe00aabc 100644 --- a/.github/workflows/ci-mlir-mpi.yml +++ b/.github/workflows/ci-mlir-mpi.yml @@ -36,7 +36,7 @@ jobs: run: | pip install -e .[tests] pip install mpi4py - pip install git+https://github.com/xdslproject/xdsl@0a2420cdf0f337f8237ec142171d5c8f046daf19 + pip install git+https://github.com/xdslproject/xdsl@306c5e00e3f715ae6710467bb1179e9fb3f394c8 - name: Test with MPI - no Openmp run: | diff --git a/.github/workflows/ci-mlir-openmp.yml b/.github/workflows/ci-mlir-openmp.yml index 1df134168a..976c3fc72d 100644 --- a/.github/workflows/ci-mlir-openmp.yml +++ b/.github/workflows/ci-mlir-openmp.yml @@ -36,7 +36,7 @@ jobs: run: | pip install -e .[tests] pip install mpi4py - pip install git+https://github.com/xdslproject/xdsl@0a2420cdf0f337f8237ec142171d5c8f046daf19 + pip install git+https://github.com/xdslproject/xdsl@306c5e00e3f715ae6710467bb1179e9fb3f394c8 - name: Test no-MPI, Openmp run: | diff --git a/.github/workflows/ci-mlir.yml b/.github/workflows/ci-mlir.yml index b010fdcc37..606de0956d 100644 --- a/.github/workflows/ci-mlir.yml +++ b/.github/workflows/ci-mlir.yml @@ -35,7 +35,7 @@ jobs: - name: Install requirements and xDSL run: | pip install -e .[tests] - pip install git+https://github.com/xdslproject/xdsl@0a2420cdf0f337f8237ec142171d5c8f046daf19 + pip install git+https://github.com/xdslproject/xdsl@306c5e00e3f715ae6710467bb1179e9fb3f394c8 - name: Test no-MPI, no-Openmp run: | diff --git a/tests/filecheck/.lit_test_times.txt b/tests/filecheck/.lit_test_times.txt new file mode 100644 index 0000000000..4d1f2a8c89 --- /dev/null +++ b/tests/filecheck/.lit_test_times.txt @@ -0,0 +1,9 @@ +-8.160377e-02 shape_inference.mlir +8.171344e-02 version.mlir +-1.473970e-01 xdsl_mpi_pipeline.mlir +-1.490667e-01 xdsl_mpi_pipeline_b.mlir +-1.464252e-01 xdsl_mpi_pipeline_c.mlir +-1.516540e-01 xdsl_mpi_pipeline_d.mlir +-1.704619e-01 xdsl_mpi_pipeline_e.mlir +-1.000817e-01 xdsl_pipeline.mlir +-1.596556e-01 xdsl_pipeline_openmp.mlir From a0cddd70b9a5efda293e0b8376677af65f083f00 Mon Sep 17 00:00:00 2001 From: Emilien Bauer Date: Tue, 20 Aug 2024 16:31:33 +0100 Subject: [PATCH 05/25] More bump. --- .github/workflows/ci-lit.yml | 2 +- .github/workflows/ci-mlir-mpi-openmp.yml | 2 +- .github/workflows/ci-mlir-mpi.yml | 2 +- .github/workflows/ci-mlir-openmp.yml | 2 +- .github/workflows/ci-mlir.yml | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci-lit.yml b/.github/workflows/ci-lit.yml index 83a3852517..8d88f522e6 100644 --- a/.github/workflows/ci-lit.yml +++ b/.github/workflows/ci-lit.yml @@ -27,7 +27,7 @@ jobs: - name: Install requirements and xDSL run: | - pip install git+https://github.com/xdslproject/xdsl@306c5e00e3f715ae6710467bb1179e9fb3f394c8 + pip install git+https://github.com/xdslproject/xdsl@f6fdfb86d20f39f2d0e2e3f76900a1013bc5ef79 pip install -e .[tests] - name: Execute lit tests diff --git a/.github/workflows/ci-mlir-mpi-openmp.yml b/.github/workflows/ci-mlir-mpi-openmp.yml index 5a8cacaf51..9058572c27 100644 --- a/.github/workflows/ci-mlir-mpi-openmp.yml +++ b/.github/workflows/ci-mlir-mpi-openmp.yml @@ -36,7 +36,7 @@ jobs: run: | pip install -e .[tests] pip install mpi4py - pip install git+https://github.com/xdslproject/xdsl@306c5e00e3f715ae6710467bb1179e9fb3f394c8 + pip install git+https://github.com/xdslproject/xdsl@f6fdfb86d20f39f2d0e2e3f76900a1013bc5ef79 - name: Test with MPI + openmp run: | diff --git a/.github/workflows/ci-mlir-mpi.yml b/.github/workflows/ci-mlir-mpi.yml index 3abe00aabc..b208df760a 100644 --- a/.github/workflows/ci-mlir-mpi.yml +++ b/.github/workflows/ci-mlir-mpi.yml @@ -36,7 +36,7 @@ jobs: run: | pip install -e .[tests] pip install mpi4py - pip install git+https://github.com/xdslproject/xdsl@306c5e00e3f715ae6710467bb1179e9fb3f394c8 + pip install git+https://github.com/xdslproject/xdsl@f6fdfb86d20f39f2d0e2e3f76900a1013bc5ef79 - name: Test with MPI - no Openmp run: | diff --git a/.github/workflows/ci-mlir-openmp.yml b/.github/workflows/ci-mlir-openmp.yml index 976c3fc72d..d5ca62ecdf 100644 --- a/.github/workflows/ci-mlir-openmp.yml +++ b/.github/workflows/ci-mlir-openmp.yml @@ -36,7 +36,7 @@ jobs: run: | pip install -e .[tests] pip install mpi4py - pip install git+https://github.com/xdslproject/xdsl@306c5e00e3f715ae6710467bb1179e9fb3f394c8 + pip install git+https://github.com/xdslproject/xdsl@f6fdfb86d20f39f2d0e2e3f76900a1013bc5ef79 - name: Test no-MPI, Openmp run: | diff --git a/.github/workflows/ci-mlir.yml b/.github/workflows/ci-mlir.yml index 606de0956d..bde4de750f 100644 --- a/.github/workflows/ci-mlir.yml +++ b/.github/workflows/ci-mlir.yml @@ -35,7 +35,7 @@ jobs: - name: Install requirements and xDSL run: | pip install -e .[tests] - pip install git+https://github.com/xdslproject/xdsl@306c5e00e3f715ae6710467bb1179e9fb3f394c8 + pip install git+https://github.com/xdslproject/xdsl@f6fdfb86d20f39f2d0e2e3f76900a1013bc5ef79 - name: Test no-MPI, no-Openmp run: | From 53095385016b8255d6b2a953eed9a640df6fd51a Mon Sep 17 00:00:00 2001 From: Emilien Bauer Date: Tue, 20 Aug 2024 16:56:41 +0100 Subject: [PATCH 06/25] Temporarily disable verify on noop too. --- devito/xdsl_core/xdsl_cpu.py | 1 + 1 file changed, 1 insertion(+) diff --git a/devito/xdsl_core/xdsl_cpu.py b/devito/xdsl_core/xdsl_cpu.py index e56d6a9a5d..7fb22e4e21 100644 --- a/devito/xdsl_core/xdsl_cpu.py +++ b/devito/xdsl_core/xdsl_cpu.py @@ -184,6 +184,7 @@ def _jit_compile(self): xdsl_args = [source_name, "--allow-unregistered-dialect", "-p", + "--disable-verify", xdsl_pipeline[1:-1],] # We use the Python API to run xDSL rather than a subprocess # This avoids reimport overhead From 2c8dfaec1c06b090b36b8b6490e1ce78c0b6168d Mon Sep 17 00:00:00 2001 From: Emilien Bauer Date: Tue, 20 Aug 2024 17:09:13 +0100 Subject: [PATCH 07/25] Another bump on the verifier relaxation. --- .github/workflows/ci-lit.yml | 2 +- .github/workflows/ci-mlir-mpi-openmp.yml | 2 +- .github/workflows/ci-mlir-mpi.yml | 2 +- .github/workflows/ci-mlir-openmp.yml | 2 +- .github/workflows/ci-mlir.yml | 2 +- devito/xdsl_core/xdsl_cpu.py | 2 -- 6 files changed, 5 insertions(+), 7 deletions(-) diff --git a/.github/workflows/ci-lit.yml b/.github/workflows/ci-lit.yml index 8d88f522e6..a187172c3e 100644 --- a/.github/workflows/ci-lit.yml +++ b/.github/workflows/ci-lit.yml @@ -27,7 +27,7 @@ jobs: - name: Install requirements and xDSL run: | - pip install git+https://github.com/xdslproject/xdsl@f6fdfb86d20f39f2d0e2e3f76900a1013bc5ef79 + pip install git+https://github.com/xdslproject/xdsl@935675efd470505028466917126f8cc8f64ce4e3 pip install -e .[tests] - name: Execute lit tests diff --git a/.github/workflows/ci-mlir-mpi-openmp.yml b/.github/workflows/ci-mlir-mpi-openmp.yml index 9058572c27..7cd7d7cbb1 100644 --- a/.github/workflows/ci-mlir-mpi-openmp.yml +++ b/.github/workflows/ci-mlir-mpi-openmp.yml @@ -36,7 +36,7 @@ jobs: run: | pip install -e .[tests] pip install mpi4py - pip install git+https://github.com/xdslproject/xdsl@f6fdfb86d20f39f2d0e2e3f76900a1013bc5ef79 + pip install git+https://github.com/xdslproject/xdsl@935675efd470505028466917126f8cc8f64ce4e3 - name: Test with MPI + openmp run: | diff --git a/.github/workflows/ci-mlir-mpi.yml b/.github/workflows/ci-mlir-mpi.yml index b208df760a..6dd145604e 100644 --- a/.github/workflows/ci-mlir-mpi.yml +++ b/.github/workflows/ci-mlir-mpi.yml @@ -36,7 +36,7 @@ jobs: run: | pip install -e .[tests] pip install mpi4py - pip install git+https://github.com/xdslproject/xdsl@f6fdfb86d20f39f2d0e2e3f76900a1013bc5ef79 + pip install git+https://github.com/xdslproject/xdsl@935675efd470505028466917126f8cc8f64ce4e3 - name: Test with MPI - no Openmp run: | diff --git a/.github/workflows/ci-mlir-openmp.yml b/.github/workflows/ci-mlir-openmp.yml index d5ca62ecdf..39bc907225 100644 --- a/.github/workflows/ci-mlir-openmp.yml +++ b/.github/workflows/ci-mlir-openmp.yml @@ -36,7 +36,7 @@ jobs: run: | pip install -e .[tests] pip install mpi4py - pip install git+https://github.com/xdslproject/xdsl@f6fdfb86d20f39f2d0e2e3f76900a1013bc5ef79 + pip install git+https://github.com/xdslproject/xdsl@935675efd470505028466917126f8cc8f64ce4e3 - name: Test no-MPI, Openmp run: | diff --git a/.github/workflows/ci-mlir.yml b/.github/workflows/ci-mlir.yml index bde4de750f..796882d73e 100644 --- a/.github/workflows/ci-mlir.yml +++ b/.github/workflows/ci-mlir.yml @@ -35,7 +35,7 @@ jobs: - name: Install requirements and xDSL run: | pip install -e .[tests] - pip install git+https://github.com/xdslproject/xdsl@f6fdfb86d20f39f2d0e2e3f76900a1013bc5ef79 + pip install git+https://github.com/xdslproject/xdsl@935675efd470505028466917126f8cc8f64ce4e3 - name: Test no-MPI, no-Openmp run: | diff --git a/devito/xdsl_core/xdsl_cpu.py b/devito/xdsl_core/xdsl_cpu.py index 7fb22e4e21..f9c4e66769 100644 --- a/devito/xdsl_core/xdsl_cpu.py +++ b/devito/xdsl_core/xdsl_cpu.py @@ -184,7 +184,6 @@ def _jit_compile(self): xdsl_args = [source_name, "--allow-unregistered-dialect", "-p", - "--disable-verify", xdsl_pipeline[1:-1],] # We use the Python API to run xDSL rather than a subprocess # This avoids reimport overhead @@ -461,7 +460,6 @@ def _jit_compile(self): # Run the first pipeline, mostly xDSL-centric xdsl_args = [source_name, "--allow-unregistered-dialect", - "--disable-verify", "-p", xdsl_pipeline[1:-1],] # We use the Python API to run xDSL rather than a subprocess From 11e9885849c15591ae0d96f65f8f4f33b1263ada Mon Sep 17 00:00:00 2001 From: Emilien Bauer Date: Tue, 20 Aug 2024 17:13:49 +0100 Subject: [PATCH 08/25] Syntax update. --- devito/xdsl_core/xdsl_gpu.py | 2 +- tests/filecheck/.lit_test_times.txt | 18 +++++++++--------- tests/filecheck/shape_inference.mlir | 6 +++--- tests/filecheck/xdsl_mpi_pipeline.mlir | 4 ++-- tests/filecheck/xdsl_mpi_pipeline_b.mlir | 4 ++-- tests/filecheck/xdsl_mpi_pipeline_c.mlir | 2 +- tests/filecheck/xdsl_mpi_pipeline_d.mlir | 2 +- tests/filecheck/xdsl_mpi_pipeline_e.mlir | 2 +- tests/filecheck/xdsl_pipeline.mlir | 4 ++-- 9 files changed, 22 insertions(+), 22 deletions(-) diff --git a/devito/xdsl_core/xdsl_gpu.py b/devito/xdsl_core/xdsl_gpu.py index 41bd08e084..9f281015b3 100644 --- a/devito/xdsl_core/xdsl_gpu.py +++ b/devito/xdsl_core/xdsl_gpu.py @@ -140,7 +140,7 @@ def _jit_compile(self): def generate_XDSL_GPU_PIPELINE(): passes = [ - "stencil-shape-inference", + "shape-inference", "convert-stencil-to-ll-mlir", "reconcile-unrealized-casts", "printf-to-llvm", diff --git a/tests/filecheck/.lit_test_times.txt b/tests/filecheck/.lit_test_times.txt index 4d1f2a8c89..84be361be3 100644 --- a/tests/filecheck/.lit_test_times.txt +++ b/tests/filecheck/.lit_test_times.txt @@ -1,9 +1,9 @@ --8.160377e-02 shape_inference.mlir -8.171344e-02 version.mlir --1.473970e-01 xdsl_mpi_pipeline.mlir --1.490667e-01 xdsl_mpi_pipeline_b.mlir --1.464252e-01 xdsl_mpi_pipeline_c.mlir --1.516540e-01 xdsl_mpi_pipeline_d.mlir --1.704619e-01 xdsl_mpi_pipeline_e.mlir --1.000817e-01 xdsl_pipeline.mlir --1.596556e-01 xdsl_pipeline_openmp.mlir +1.627717e-01 shape_inference.mlir +9.103966e-02 version.mlir +-1.920681e-01 xdsl_mpi_pipeline.mlir +-1.922677e-01 xdsl_mpi_pipeline_b.mlir +-1.886339e-01 xdsl_mpi_pipeline_c.mlir +-2.020643e-01 xdsl_mpi_pipeline_d.mlir +-2.121959e-01 xdsl_mpi_pipeline_e.mlir +-2.791779e-01 xdsl_pipeline.mlir +-1.644087e-01 xdsl_pipeline_openmp.mlir diff --git a/tests/filecheck/shape_inference.mlir b/tests/filecheck/shape_inference.mlir index 77194b9944..764541d970 100644 --- a/tests/filecheck/shape_inference.mlir +++ b/tests/filecheck/shape_inference.mlir @@ -1,4 +1,4 @@ -// RUN: xdsl-opt -p stencil-shape-inference %s | filecheck %s +// RUN: xdsl-opt -p shape-inference %s | filecheck %s builtin.module { func.func @Kernel(%f2_vec0 : !stencil.field<[-2,5]x[-2,5]xf32>, %f2_vec1 : !stencil.field<[-2,5]x[-2,5]xf32>, %timers : !llvm.ptr) { @@ -64,7 +64,7 @@ builtin.module { %47 = arith.mulf %46, %dt_1 : f32 stencil.return %47 : f32 } - %f2_t1_temp_1 = stencil.store %f2_t1_temp to %f2_t1 ([0, 0] : [3, 3]) : !stencil.temp to !stencil.field<[-2,5]x[-2,5]xf32> with_halo : !stencil.temp + stencil.store %f2_t1_temp to %f2_t1(<[0, 0], [3, 3]>) : !stencil.temp to !stencil.field<[-2,5]x[-2,5]xf32> scf.yield %f2_t1, %f2_t0 : !stencil.field<[-2,5]x[-2,5]xf32>, !stencil.field<[-2,5]x[-2,5]xf32> } %5 = func.call @timer_end(%0) : (f64) -> f64 @@ -139,7 +139,7 @@ builtin.module { // CHECK-NEXT: %47 = arith.mulf %46, %dt_1 : f32 // CHECK-NEXT: stencil.return %47 : f32 // CHECK-NEXT: } -// CHECK-NEXT: %f2_t1_temp_1 = stencil.store %f2_t1_temp to %f2_t1 ([0, 0] : [3, 3]) : !stencil.temp<[0,3]x[0,3]xf32> to !stencil.field<[-2,5]x[-2,5]xf32> with_halo : !stencil.temp +// CHECK-NEXT: stencil.store %f2_t1_temp to %f2_t1(<[0, 0], [3, 3]>) : !stencil.temp<[0,3]x[0,3]xf32> to !stencil.field<[-2,5]x[-2,5]xf32> // CHECK-NEXT: scf.yield %f2_t1, %f2_t0 : !stencil.field<[-2,5]x[-2,5]xf32>, !stencil.field<[-2,5]x[-2,5]xf32> // CHECK-NEXT: } // CHECK-NEXT: %5 = func.call @timer_end(%0) : (f64) -> f64 diff --git a/tests/filecheck/xdsl_mpi_pipeline.mlir b/tests/filecheck/xdsl_mpi_pipeline.mlir index c651e5a984..d4c986a0b1 100644 --- a/tests/filecheck/xdsl_mpi_pipeline.mlir +++ b/tests/filecheck/xdsl_mpi_pipeline.mlir @@ -94,7 +94,7 @@ builtin.module { %73 = arith.mulf %7, %72 : f32 stencil.return %73 : f32 } - %u_t1_temp_1 = stencil.store %u_t1_temp to %u_t1 ([0, 0, 0] : [51, 101, 101]) : !stencil.temp to !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32> with_halo : !stencil.temp + stencil.store %u_t1_temp to %u_t1(<[0, 0, 0], [51, 101, 101]>) : !stencil.temp to !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32> scf.yield %u_t1, %u_t2, %u_t0 : !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32> } %6 = func.call @timer_end(%0) : (f64) -> f64 @@ -202,7 +202,7 @@ builtin.module { // CHECK-NEXT: %73 = arith.mulf %7, %72 : f32 // CHECK-NEXT: stencil.return %73 : f32 // CHECK-NEXT: } -// CHECK-NEXT: %u_t1_temp_1 = stencil.store %u_t1_temp to %u_t1 ([0, 0, 0] : [51, 101, 101]) : !stencil.temp<[0,51]x[0,101]x[0,101]xf32> to !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32> with_halo : !stencil.temp +// CHECK-NEXT: stencil.store %u_t1_temp to %u_t1(<[0, 0, 0], [51, 101, 101]>) : !stencil.temp<[0,51]x[0,101]x[0,101]xf32> to !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32> // CHECK-NEXT: scf.yield %u_t1, %u_t2, %u_t0 : !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32> // CHECK-NEXT: } // CHECK-NEXT: %6 = func.call @timer_end(%0) : (f64) -> f64 diff --git a/tests/filecheck/xdsl_mpi_pipeline_b.mlir b/tests/filecheck/xdsl_mpi_pipeline_b.mlir index 035c313aa2..b27625d132 100644 --- a/tests/filecheck/xdsl_mpi_pipeline_b.mlir +++ b/tests/filecheck/xdsl_mpi_pipeline_b.mlir @@ -94,7 +94,7 @@ builtin.module { %73 = arith.mulf %7, %72 : f32 stencil.return %73 : f32 } - %u_t1_temp_1 = stencil.store %u_t1_temp to %u_t1 ([0, 0, 0] : [51, 101, 101]) : !stencil.temp to !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32> with_halo : !stencil.temp + stencil.store %u_t1_temp to %u_t1(<[0, 0, 0], [51, 101, 101]>) : !stencil.temp to !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32> scf.yield %u_t1, %u_t2, %u_t0 : !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32> } %6 = func.call @timer_end(%0) : (f64) -> f64 @@ -201,7 +201,7 @@ builtin.module { // CHECK-NEXT: %73 = arith.mulf %7, %72 : f32 // CHECK-NEXT: stencil.return %73 : f32 // CHECK-NEXT: } -// CHECK-NEXT: %u_t1_temp_1 = stencil.store %u_t1_temp to %u_t1 ([0, 0, 0] : [51, 101, 101]) : !stencil.temp<[0,51]x[0,101]x[0,101]xf32> to !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32> with_halo : !stencil.temp +// CHECK-NEXT: stencil.store %u_t1_temp to %u_t1(<[0, 0, 0], [51, 101, 101]>) : !stencil.temp<[0,51]x[0,101]x[0,101]xf32> to !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32> // CHECK-NEXT: scf.yield %u_t1, %u_t2, %u_t0 : !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32> // CHECK-NEXT: } // CHECK-NEXT: %6 = func.call @timer_end(%0) : (f64) -> f64 diff --git a/tests/filecheck/xdsl_mpi_pipeline_c.mlir b/tests/filecheck/xdsl_mpi_pipeline_c.mlir index d4eca46f97..93ba65c468 100644 --- a/tests/filecheck/xdsl_mpi_pipeline_c.mlir +++ b/tests/filecheck/xdsl_mpi_pipeline_c.mlir @@ -94,7 +94,7 @@ builtin.module { %73 = arith.mulf %7, %72 : f32 stencil.return %73 : f32 } - %u_t1_temp_1 = stencil.store %u_t1_temp to %u_t1 ([0, 0, 0] : [51, 101, 101]) : !stencil.temp to !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32> with_halo : !stencil.temp + stencil.store %u_t1_temp to %u_t1(<[0, 0, 0], [51, 101, 101]>) : !stencil.temp to !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32> scf.yield %u_t1, %u_t2, %u_t0 : !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32> } %6 = func.call @timer_end(%0) : (f64) -> f64 diff --git a/tests/filecheck/xdsl_mpi_pipeline_d.mlir b/tests/filecheck/xdsl_mpi_pipeline_d.mlir index 08f3f92b2f..b02f1d4baf 100644 --- a/tests/filecheck/xdsl_mpi_pipeline_d.mlir +++ b/tests/filecheck/xdsl_mpi_pipeline_d.mlir @@ -94,7 +94,7 @@ builtin.module { %73 = arith.mulf %7, %72 : f32 stencil.return %73 : f32 } - %u_t1_temp_1 = stencil.store %u_t1_temp to %u_t1 ([0, 0, 0] : [51, 101, 101]) : !stencil.temp to !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32> with_halo : !stencil.temp + stencil.store %u_t1_temp to %u_t1(<[0, 0, 0], [51, 101, 101]>) : !stencil.temp to !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32> scf.yield %u_t1, %u_t2, %u_t0 : !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32> } %6 = func.call @timer_end(%0) : (f64) -> f64 diff --git a/tests/filecheck/xdsl_mpi_pipeline_e.mlir b/tests/filecheck/xdsl_mpi_pipeline_e.mlir index 82793b8062..3c230e1125 100644 --- a/tests/filecheck/xdsl_mpi_pipeline_e.mlir +++ b/tests/filecheck/xdsl_mpi_pipeline_e.mlir @@ -94,7 +94,7 @@ builtin.module { %73 = arith.mulf %7, %72 : f32 stencil.return %73 : f32 } - %u_t1_temp_1 = stencil.store %u_t1_temp to %u_t1 ([0, 0, 0] : [51, 101, 101]) : !stencil.temp to !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32> with_halo : !stencil.temp + stencil.store %u_t1_temp to %u_t1(<[0, 0, 0], [51, 101, 101]>) : !stencil.temp to !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32> scf.yield %u_t1, %u_t2, %u_t0 : !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32> } %6 = func.call @timer_end(%0) : (f64) -> f64 diff --git a/tests/filecheck/xdsl_pipeline.mlir b/tests/filecheck/xdsl_pipeline.mlir index 5243a3ca7c..726f57e097 100644 --- a/tests/filecheck/xdsl_pipeline.mlir +++ b/tests/filecheck/xdsl_pipeline.mlir @@ -1,4 +1,4 @@ -// RUN: xdsl-opt -p stencil-shape-inference,convert-stencil-to-ll-mlir,scf-parallel-loop-tiling{parallel-loop-tile-sizes=64,0},printf-to-llvm,canonicalize %s | filecheck %s +// RUN: xdsl-opt -p shape-inference,convert-stencil-to-ll-mlir,scf-parallel-loop-tiling{parallel-loop-tile-sizes=64,0},printf-to-llvm,canonicalize %s | filecheck %s builtin.module { func.func @Kernel(%f2_vec0 : !stencil.field<[-2,5]x[-2,5]xf32>, %f2_vec1 : !stencil.field<[-2,5]x[-2,5]xf32>, %timers : !llvm.ptr) { @@ -64,7 +64,7 @@ builtin.module { %47 = arith.mulf %46, %dt_1 : f32 stencil.return %47 : f32 } - %f2_t1_temp_1 = stencil.store %f2_t1_temp to %f2_t1 ([0, 0] : [3, 3]) : !stencil.temp to !stencil.field<[-2,5]x[-2,5]xf32> with_halo : !stencil.temp + stencil.store %f2_t1_temp to %f2_t1(<[0, 0], [3, 3]>) : !stencil.temp to !stencil.field<[-2,5]x[-2,5]xf32> scf.yield %f2_t1, %f2_t0 : !stencil.field<[-2,5]x[-2,5]xf32>, !stencil.field<[-2,5]x[-2,5]xf32> } %5 = func.call @timer_end(%0) : (f64) -> f64 From 5c15ce99459e596b7092a91ebd570181cfc6a502 Mon Sep 17 00:00:00 2001 From: Emilien Bauer Date: Tue, 20 Aug 2024 17:21:30 +0100 Subject: [PATCH 09/25] Filecheck pipelines update. --- tests/filecheck/.lit_test_times.txt | 18 +++++++++--------- tests/filecheck/xdsl_mpi_pipeline.mlir | 2 +- tests/filecheck/xdsl_mpi_pipeline_b.mlir | 2 +- tests/filecheck/xdsl_mpi_pipeline_c.mlir | 2 +- tests/filecheck/xdsl_mpi_pipeline_d.mlir | 2 +- tests/filecheck/xdsl_mpi_pipeline_e.mlir | 2 +- tests/filecheck/xdsl_pipeline.mlir | 2 +- tests/filecheck/xdsl_pipeline_openmp.mlir | 2 +- 8 files changed, 16 insertions(+), 16 deletions(-) diff --git a/tests/filecheck/.lit_test_times.txt b/tests/filecheck/.lit_test_times.txt index 84be361be3..b1051222fe 100644 --- a/tests/filecheck/.lit_test_times.txt +++ b/tests/filecheck/.lit_test_times.txt @@ -1,9 +1,9 @@ -1.627717e-01 shape_inference.mlir -9.103966e-02 version.mlir --1.920681e-01 xdsl_mpi_pipeline.mlir --1.922677e-01 xdsl_mpi_pipeline_b.mlir --1.886339e-01 xdsl_mpi_pipeline_c.mlir --2.020643e-01 xdsl_mpi_pipeline_d.mlir --2.121959e-01 xdsl_mpi_pipeline_e.mlir --2.791779e-01 xdsl_pipeline.mlir --1.644087e-01 xdsl_pipeline_openmp.mlir +1.647806e-01 shape_inference.mlir +8.123279e-02 version.mlir +-2.095106e-01 xdsl_mpi_pipeline.mlir +-2.172942e-01 xdsl_mpi_pipeline_b.mlir +-7.517524e-01 xdsl_mpi_pipeline_c.mlir +-7.795422e-01 xdsl_mpi_pipeline_d.mlir +-8.045800e-01 xdsl_mpi_pipeline_e.mlir +-2.641258e-01 xdsl_pipeline.mlir +-1.632233e-01 xdsl_pipeline_openmp.mlir diff --git a/tests/filecheck/xdsl_mpi_pipeline.mlir b/tests/filecheck/xdsl_mpi_pipeline.mlir index d4c986a0b1..8cb334cdf3 100644 --- a/tests/filecheck/xdsl_mpi_pipeline.mlir +++ b/tests/filecheck/xdsl_mpi_pipeline.mlir @@ -1,4 +1,4 @@ -// RUN: xdsl-opt -p "distribute-stencil{strategy=3d-grid slices=2,1,1 restrict_domain=false}" %s | filecheck %s +// RUN: xdsl-opt -p "canonicalize,cse,distribute-stencil{strategy=3d-grid slices=2,1,1 restrict_domain=false},shape-inference" %s | filecheck %s builtin.module { func.func @Kernel(%u_vec0 : !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, %u_vec1 : !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, %u_vec2 : !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, %timers : !llvm.ptr) { diff --git a/tests/filecheck/xdsl_mpi_pipeline_b.mlir b/tests/filecheck/xdsl_mpi_pipeline_b.mlir index b27625d132..684e714266 100644 --- a/tests/filecheck/xdsl_mpi_pipeline_b.mlir +++ b/tests/filecheck/xdsl_mpi_pipeline_b.mlir @@ -1,4 +1,4 @@ -// RUN: xdsl-opt -p "distribute-stencil{strategy=3d-grid slices=2,1,1 restrict_domain=false},canonicalize-dmp" %s | filecheck %s +// RUN: xdsl-opt -p "canonicalize,cse,distribute-stencil{strategy=3d-grid slices=2,1,1 restrict_domain=false},shape-inference,canonicalize-dmp" %s | filecheck %s builtin.module { func.func @Kernel(%u_vec0 : !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, %u_vec1 : !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, %u_vec2 : !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, %timers : !llvm.ptr) { diff --git a/tests/filecheck/xdsl_mpi_pipeline_c.mlir b/tests/filecheck/xdsl_mpi_pipeline_c.mlir index 93ba65c468..e94a7d2504 100644 --- a/tests/filecheck/xdsl_mpi_pipeline_c.mlir +++ b/tests/filecheck/xdsl_mpi_pipeline_c.mlir @@ -1,4 +1,4 @@ -// RUN: xdsl-opt -p "distribute-stencil{strategy=3d-grid slices=2,1,1 restrict_domain=false},canonicalize-dmp,convert-stencil-to-ll-mlir" %s | filecheck %s +// RUN: xdsl-opt -p "canonicalize,cse,distribute-stencil{strategy=3d-grid slices=2,1,1 restrict_domain=false},shape-inference,canonicalize-dmp,stencil-bufferize,dmp-to-mpi{mpi_init=false},convert-stencil-to-ll-mlir" %s | filecheck %s builtin.module { func.func @Kernel(%u_vec0 : !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, %u_vec1 : !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, %u_vec2 : !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, %timers : !llvm.ptr) { diff --git a/tests/filecheck/xdsl_mpi_pipeline_d.mlir b/tests/filecheck/xdsl_mpi_pipeline_d.mlir index b02f1d4baf..24460a858d 100644 --- a/tests/filecheck/xdsl_mpi_pipeline_d.mlir +++ b/tests/filecheck/xdsl_mpi_pipeline_d.mlir @@ -1,4 +1,4 @@ -// RUN: xdsl-opt -p "distribute-stencil{strategy=3d-grid slices=2,1,1 restrict_domain=false},canonicalize-dmp,convert-stencil-to-ll-mlir,scf-parallel-loop-tiling{parallel-loop-tile-sizes=64,64,0}" %s | filecheck %s +// RUN: xdsl-opt -p "canonicalize,cse,distribute-stencil{strategy=3d-grid slices=2,1,1 restrict_domain=false},shape-inference,canonicalize-dmp,stencil-bufferize,dmp-to-mpi{mpi_init=false},convert-stencil-to-ll-mlir,scf-parallel-loop-tiling{parallel-loop-tile-sizes=64,64,0}" %s | filecheck %s builtin.module { func.func @Kernel(%u_vec0 : !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, %u_vec1 : !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, %u_vec2 : !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, %timers : !llvm.ptr) { diff --git a/tests/filecheck/xdsl_mpi_pipeline_e.mlir b/tests/filecheck/xdsl_mpi_pipeline_e.mlir index 3c230e1125..14e82658d0 100644 --- a/tests/filecheck/xdsl_mpi_pipeline_e.mlir +++ b/tests/filecheck/xdsl_mpi_pipeline_e.mlir @@ -1,4 +1,4 @@ -// RUN: xdsl-opt -p "distribute-stencil{strategy=3d-grid slices=2,1,1 restrict_domain=false},canonicalize-dmp,convert-stencil-to-ll-mlir,scf-parallel-loop-tiling{parallel-loop-tile-sizes=64,64,0},dmp-to-mpi{mpi_init=false},lower-mpi" %s | filecheck %s +// RUN: xdsl-opt -p "canonicalize,cse,distribute-stencil{strategy=3d-grid slices=2,1,1 restrict_domain=false},shape-inference,canonicalize-dmp,stencil-bufferize,dmp-to-mpi{mpi_init=false},convert-stencil-to-ll-mlir,scf-parallel-loop-tiling{parallel-loop-tile-sizes=64,64,0},dmp-to-mpi{mpi_init=false},lower-mpi" %s | filecheck %s builtin.module { func.func @Kernel(%u_vec0 : !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, %u_vec1 : !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, %u_vec2 : !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, %timers : !llvm.ptr) { diff --git a/tests/filecheck/xdsl_pipeline.mlir b/tests/filecheck/xdsl_pipeline.mlir index 726f57e097..f2981a3a17 100644 --- a/tests/filecheck/xdsl_pipeline.mlir +++ b/tests/filecheck/xdsl_pipeline.mlir @@ -1,4 +1,4 @@ -// RUN: xdsl-opt -p shape-inference,convert-stencil-to-ll-mlir,scf-parallel-loop-tiling{parallel-loop-tile-sizes=64,0},printf-to-llvm,canonicalize %s | filecheck %s +// RUN: xdsl-opt -p canonicalize,cse,shape-inference,stencil-bufferize,convert-stencil-to-ll-mlir,scf-parallel-loop-tiling{parallel-loop-tile-sizes=64,0},printf-to-llvm,canonicalize %s | filecheck %s builtin.module { func.func @Kernel(%f2_vec0 : !stencil.field<[-2,5]x[-2,5]xf32>, %f2_vec1 : !stencil.field<[-2,5]x[-2,5]xf32>, %timers : !llvm.ptr) { diff --git a/tests/filecheck/xdsl_pipeline_openmp.mlir b/tests/filecheck/xdsl_pipeline_openmp.mlir index 85005e4770..127763e598 100644 --- a/tests/filecheck/xdsl_pipeline_openmp.mlir +++ b/tests/filecheck/xdsl_pipeline_openmp.mlir @@ -1,4 +1,4 @@ -// RUN: xdsl-opt -p "canonicalize" %s | filecheck %s +// RUN: xdsl-opt -p "canonicalize,cse" %s | filecheck %s builtin.module { func.func @xDSLDiffusionOperator(%u_vec0 : memref<158x158x158xf32>, %u_vec1 : memref<158x158x158xf32>, %timers : !llvm.ptr) { From 6bbe2371f7a0dc460402f3d9f1dcc267dea9f3f5 Mon Sep 17 00:00:00 2001 From: Emilien Bauer Date: Tue, 20 Aug 2024 17:30:28 +0100 Subject: [PATCH 10/25] Filecheck updates. --- tests/filecheck/.lit_test_times.txt | 18 +- tests/filecheck/xdsl_mpi_pipeline.mlir | 141 ++-- tests/filecheck/xdsl_mpi_pipeline_b.mlir | 121 +-- tests/filecheck/xdsl_mpi_pipeline_c.mlir | 425 ++++++++--- tests/filecheck/xdsl_mpi_pipeline_d.mlir | 444 +++++++---- tests/filecheck/xdsl_mpi_pipeline_e.mlir | 871 +++++++++------------- tests/filecheck/xdsl_pipeline.mlir | 131 ++-- tests/filecheck/xdsl_pipeline_openmp.mlir | 272 +++---- 8 files changed, 1224 insertions(+), 1199 deletions(-) diff --git a/tests/filecheck/.lit_test_times.txt b/tests/filecheck/.lit_test_times.txt index b1051222fe..5b5bf8f863 100644 --- a/tests/filecheck/.lit_test_times.txt +++ b/tests/filecheck/.lit_test_times.txt @@ -1,9 +1,9 @@ -1.647806e-01 shape_inference.mlir -8.123279e-02 version.mlir --2.095106e-01 xdsl_mpi_pipeline.mlir --2.172942e-01 xdsl_mpi_pipeline_b.mlir --7.517524e-01 xdsl_mpi_pipeline_c.mlir --7.795422e-01 xdsl_mpi_pipeline_d.mlir --8.045800e-01 xdsl_mpi_pipeline_e.mlir --2.641258e-01 xdsl_pipeline.mlir --1.632233e-01 xdsl_pipeline_openmp.mlir +1.568606e-01 shape_inference.mlir +8.218265e-02 version.mlir +2.304106e-01 xdsl_mpi_pipeline.mlir +2.372591e-01 xdsl_mpi_pipeline_b.mlir +7.415709e-01 xdsl_mpi_pipeline_c.mlir +7.533531e-01 xdsl_mpi_pipeline_d.mlir +7.714086e-01 xdsl_mpi_pipeline_e.mlir +2.611284e-01 xdsl_pipeline.mlir +1.698225e-01 xdsl_pipeline_openmp.mlir diff --git a/tests/filecheck/xdsl_mpi_pipeline.mlir b/tests/filecheck/xdsl_mpi_pipeline.mlir index 8cb334cdf3..8d62ae8d60 100644 --- a/tests/filecheck/xdsl_mpi_pipeline.mlir +++ b/tests/filecheck/xdsl_mpi_pipeline.mlir @@ -1,4 +1,4 @@ -// RUN: xdsl-opt -p "canonicalize,cse,distribute-stencil{strategy=3d-grid slices=2,1,1 restrict_domain=false},shape-inference" %s | filecheck %s +// RUN: xdsl-opt -p "canonicalize,cse,distribute-stencil{strategy=3d-grid slices=2,1,1 restrict_domain=false},shape-inference,canonicalize,cse" %s | filecheck %s builtin.module { func.func @Kernel(%u_vec0 : !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, %u_vec1 : !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, %u_vec2 : !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, %timers : !llvm.ptr) { @@ -111,104 +111,63 @@ builtin.module { // CHECK-NEXT: %0 = func.call @timer_start() : () -> f64 // CHECK-NEXT: %time_m = arith.constant 1 : index // CHECK-NEXT: %time_M = arith.constant 20 : index -// CHECK-NEXT: %1 = arith.constant 1 : index -// CHECK-NEXT: %2 = arith.addi %time_M, %1 : index -// CHECK-NEXT: %step = arith.constant 1 : index -// CHECK-NEXT: %3, %4, %5 = scf.for %time = %time_m to %2 step %step iter_args(%u_t0 = %u_vec0, %u_t1 = %u_vec1, %u_t2 = %u_vec2) -> (!stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>) { +// CHECK-NEXT: %1 = arith.addi %time_M, %time_m : index +// CHECK-NEXT: %2, %3, %4 = scf.for %time = %time_m to %1 step %time_m iter_args(%u_t0 = %u_vec0, %u_t1 = %u_vec1, %u_t2 = %u_vec2) -> (!stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>) { // CHECK-NEXT: %u_t0_temp = stencil.load %u_t0 : !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32> -> !stencil.temp<[-1,52]x[-1,102]x[-1,102]xf32> -// CHECK-NEXT: "dmp.swap"(%u_t0_temp) {"topo" = #dmp.topo<2x1x1>, "swaps" = [#dmp.exchange, #dmp.exchange, #dmp.exchange, #dmp.exchange, #dmp.exchange, #dmp.exchange]} : (!stencil.temp<[-1,52]x[-1,102]x[-1,102]xf32>) -> () +// CHECK-NEXT: %5 = "dmp.swap"(%u_t0_temp) {"strategy" = #dmp.grid_slice_3d<#dmp.topo<2x1x1>, false>, "swaps" = [#dmp.exchange, #dmp.exchange, #dmp.exchange, #dmp.exchange, #dmp.exchange, #dmp.exchange]} : (!stencil.temp<[-1,52]x[-1,102]x[-1,102]xf32>) -> !stencil.temp<[-1,52]x[-1,102]x[-1,102]xf32> // CHECK-NEXT: %u_t2_temp = stencil.load %u_t2 : !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32> -> !stencil.temp<[0,51]x[0,101]x[0,101]xf32> -// CHECK-NEXT: "dmp.swap"(%u_t2_temp) {"topo" = #dmp.topo<2x1x1>, "swaps" = []} : (!stencil.temp<[0,51]x[0,101]x[0,101]xf32>) -> () -// CHECK-NEXT: %u_t1_temp = stencil.apply(%u_t0_blk = %u_t0_temp : !stencil.temp<[-1,52]x[-1,102]x[-1,102]xf32>, %u_t2_blk = %u_t2_temp : !stencil.temp<[0,51]x[0,101]x[0,101]xf32>) -> (!stencil.temp<[0,51]x[0,101]x[0,101]xf32>) { +// CHECK-NEXT: %6 = "dmp.swap"(%u_t2_temp) {"strategy" = #dmp.grid_slice_3d<#dmp.topo<2x1x1>, false>, "swaps" = []} : (!stencil.temp<[0,51]x[0,101]x[0,101]xf32>) -> !stencil.temp<[0,51]x[0,101]x[0,101]xf32> +// CHECK-NEXT: %u_t1_temp = stencil.apply(%u_t0_blk = %5 : !stencil.temp<[-1,52]x[-1,102]x[-1,102]xf32>, %u_t2_blk = %6 : !stencil.temp<[0,51]x[0,101]x[0,101]xf32>) -> (!stencil.temp<[0,51]x[0,101]x[0,101]xf32>) { // CHECK-NEXT: %dt = arith.constant 1.000000e-04 : f32 -// CHECK-NEXT: %6 = arith.constant 2 : i64 -// CHECK-NEXT: %7 = "math.fpowi"(%dt, %6) : (f32, i64) -> f32 -// CHECK-NEXT: %8 = arith.constant -1 : i64 -// CHECK-NEXT: %dt_1 = arith.constant 1.000000e-04 : f32 -// CHECK-NEXT: %9 = arith.constant -2 : i64 -// CHECK-NEXT: %10 = "math.fpowi"(%dt_1, %9) : (f32, i64) -> f32 -// CHECK-NEXT: %11 = stencil.access %u_t2_blk[0, 0, 0] : !stencil.temp<[0,51]x[0,101]x[0,101]xf32> -// CHECK-NEXT: %12 = arith.mulf %10, %11 : f32 -// CHECK-NEXT: %13 = arith.constant -2.000000e+00 : f32 -// CHECK-NEXT: %dt_2 = arith.constant 1.000000e-04 : f32 -// CHECK-NEXT: %14 = arith.constant -2 : i64 -// CHECK-NEXT: %15 = "math.fpowi"(%dt_2, %14) : (f32, i64) -> f32 -// CHECK-NEXT: %16 = stencil.access %u_t0_blk[0, 0, 0] : !stencil.temp<[-1,52]x[-1,102]x[-1,102]xf32> -// CHECK-NEXT: %17 = arith.mulf %13, %15 : f32 -// CHECK-NEXT: %18 = arith.mulf %17, %16 : f32 -// CHECK-NEXT: %19 = arith.addf %12, %18 : f32 -// CHECK-NEXT: %20 = arith.sitofp %8 : i64 to f32 -// CHECK-NEXT: %21 = arith.mulf %20, %19 : f32 +// CHECK-NEXT: %7 = arith.constant 2 : i64 +// CHECK-NEXT: %8 = "math.fpowi"(%dt, %7) : (f32, i64) -> f32 +// CHECK-NEXT: %9 = arith.constant -1 : i64 +// CHECK-NEXT: %10 = arith.constant -2 : i64 +// CHECK-NEXT: %11 = "math.fpowi"(%dt, %10) : (f32, i64) -> f32 +// CHECK-NEXT: %12 = stencil.access %u_t2_blk[0, 0, 0] : !stencil.temp<[0,51]x[0,101]x[0,101]xf32> +// CHECK-NEXT: %13 = arith.mulf %11, %12 : f32 +// CHECK-NEXT: %14 = arith.constant -2.000000e+00 : f32 +// CHECK-NEXT: %15 = stencil.access %u_t0_blk[0, 0, 0] : !stencil.temp<[-1,52]x[-1,102]x[-1,102]xf32> +// CHECK-NEXT: %16 = arith.mulf %14, %11 : f32 +// CHECK-NEXT: %17 = arith.mulf %16, %15 : f32 +// CHECK-NEXT: %18 = arith.addf %13, %17 : f32 +// CHECK-NEXT: %19 = arith.sitofp %9 : i64 to f32 +// CHECK-NEXT: %20 = arith.mulf %19, %18 : f32 // CHECK-NEXT: %h_x = arith.constant 1.000000e-02 : f32 -// CHECK-NEXT: %22 = arith.constant -2 : i64 -// CHECK-NEXT: %23 = "math.fpowi"(%h_x, %22) : (f32, i64) -> f32 -// CHECK-NEXT: %24 = stencil.access %u_t0_blk[-1, 0, 0] : !stencil.temp<[-1,52]x[-1,102]x[-1,102]xf32> -// CHECK-NEXT: %25 = arith.mulf %23, %24 : f32 -// CHECK-NEXT: %h_x_1 = arith.constant 1.000000e-02 : f32 -// CHECK-NEXT: %26 = arith.constant -2 : i64 -// CHECK-NEXT: %27 = "math.fpowi"(%h_x_1, %26) : (f32, i64) -> f32 -// CHECK-NEXT: %28 = stencil.access %u_t0_blk[1, 0, 0] : !stencil.temp<[-1,52]x[-1,102]x[-1,102]xf32> -// CHECK-NEXT: %29 = arith.mulf %27, %28 : f32 -// CHECK-NEXT: %30 = arith.constant -2.000000e+00 : f32 -// CHECK-NEXT: %h_x_2 = arith.constant 1.000000e-02 : f32 -// CHECK-NEXT: %31 = arith.constant -2 : i64 -// CHECK-NEXT: %32 = "math.fpowi"(%h_x_2, %31) : (f32, i64) -> f32 -// CHECK-NEXT: %33 = stencil.access %u_t0_blk[0, 0, 0] : !stencil.temp<[-1,52]x[-1,102]x[-1,102]xf32> -// CHECK-NEXT: %34 = arith.mulf %30, %32 : f32 -// CHECK-NEXT: %35 = arith.mulf %34, %33 : f32 -// CHECK-NEXT: %36 = arith.addf %25, %29 : f32 -// CHECK-NEXT: %37 = arith.addf %36, %35 : f32 -// CHECK-NEXT: %h_y = arith.constant 1.000000e-02 : f32 -// CHECK-NEXT: %38 = arith.constant -2 : i64 -// CHECK-NEXT: %39 = "math.fpowi"(%h_y, %38) : (f32, i64) -> f32 -// CHECK-NEXT: %40 = stencil.access %u_t0_blk[0, -1, 0] : !stencil.temp<[-1,52]x[-1,102]x[-1,102]xf32> -// CHECK-NEXT: %41 = arith.mulf %39, %40 : f32 -// CHECK-NEXT: %h_y_1 = arith.constant 1.000000e-02 : f32 -// CHECK-NEXT: %42 = arith.constant -2 : i64 -// CHECK-NEXT: %43 = "math.fpowi"(%h_y_1, %42) : (f32, i64) -> f32 -// CHECK-NEXT: %44 = stencil.access %u_t0_blk[0, 1, 0] : !stencil.temp<[-1,52]x[-1,102]x[-1,102]xf32> -// CHECK-NEXT: %45 = arith.mulf %43, %44 : f32 -// CHECK-NEXT: %46 = arith.constant -2.000000e+00 : f32 -// CHECK-NEXT: %h_y_2 = arith.constant 1.000000e-02 : f32 -// CHECK-NEXT: %47 = arith.constant -2 : i64 -// CHECK-NEXT: %48 = "math.fpowi"(%h_y_2, %47) : (f32, i64) -> f32 -// CHECK-NEXT: %49 = stencil.access %u_t0_blk[0, 0, 0] : !stencil.temp<[-1,52]x[-1,102]x[-1,102]xf32> -// CHECK-NEXT: %50 = arith.mulf %46, %48 : f32 -// CHECK-NEXT: %51 = arith.mulf %50, %49 : f32 -// CHECK-NEXT: %52 = arith.addf %41, %45 : f32 -// CHECK-NEXT: %53 = arith.addf %52, %51 : f32 -// CHECK-NEXT: %h_z = arith.constant 1.000000e-02 : f32 -// CHECK-NEXT: %54 = arith.constant -2 : i64 -// CHECK-NEXT: %55 = "math.fpowi"(%h_z, %54) : (f32, i64) -> f32 -// CHECK-NEXT: %56 = stencil.access %u_t0_blk[0, 0, -1] : !stencil.temp<[-1,52]x[-1,102]x[-1,102]xf32> -// CHECK-NEXT: %57 = arith.mulf %55, %56 : f32 -// CHECK-NEXT: %h_z_1 = arith.constant 1.000000e-02 : f32 -// CHECK-NEXT: %58 = arith.constant -2 : i64 -// CHECK-NEXT: %59 = "math.fpowi"(%h_z_1, %58) : (f32, i64) -> f32 -// CHECK-NEXT: %60 = stencil.access %u_t0_blk[0, 0, 1] : !stencil.temp<[-1,52]x[-1,102]x[-1,102]xf32> -// CHECK-NEXT: %61 = arith.mulf %59, %60 : f32 -// CHECK-NEXT: %62 = arith.constant -2.000000e+00 : f32 -// CHECK-NEXT: %h_z_2 = arith.constant 1.000000e-02 : f32 -// CHECK-NEXT: %63 = arith.constant -2 : i64 -// CHECK-NEXT: %64 = "math.fpowi"(%h_z_2, %63) : (f32, i64) -> f32 -// CHECK-NEXT: %65 = stencil.access %u_t0_blk[0, 0, 0] : !stencil.temp<[-1,52]x[-1,102]x[-1,102]xf32> -// CHECK-NEXT: %66 = arith.mulf %62, %64 : f32 -// CHECK-NEXT: %67 = arith.mulf %66, %65 : f32 -// CHECK-NEXT: %68 = arith.addf %57, %61 : f32 -// CHECK-NEXT: %69 = arith.addf %68, %67 : f32 -// CHECK-NEXT: %70 = arith.addf %21, %37 : f32 -// CHECK-NEXT: %71 = arith.addf %70, %53 : f32 -// CHECK-NEXT: %72 = arith.addf %71, %69 : f32 -// CHECK-NEXT: %73 = arith.mulf %7, %72 : f32 -// CHECK-NEXT: stencil.return %73 : f32 +// CHECK-NEXT: %21 = "math.fpowi"(%h_x, %10) : (f32, i64) -> f32 +// CHECK-NEXT: %22 = stencil.access %u_t0_blk[-1, 0, 0] : !stencil.temp<[-1,52]x[-1,102]x[-1,102]xf32> +// CHECK-NEXT: %23 = arith.mulf %21, %22 : f32 +// CHECK-NEXT: %24 = stencil.access %u_t0_blk[1, 0, 0] : !stencil.temp<[-1,52]x[-1,102]x[-1,102]xf32> +// CHECK-NEXT: %25 = arith.mulf %21, %24 : f32 +// CHECK-NEXT: %26 = arith.mulf %14, %21 : f32 +// CHECK-NEXT: %27 = arith.mulf %26, %15 : f32 +// CHECK-NEXT: %28 = arith.addf %23, %25 : f32 +// CHECK-NEXT: %29 = arith.addf %28, %27 : f32 +// CHECK-NEXT: %30 = stencil.access %u_t0_blk[0, -1, 0] : !stencil.temp<[-1,52]x[-1,102]x[-1,102]xf32> +// CHECK-NEXT: %31 = arith.mulf %21, %30 : f32 +// CHECK-NEXT: %32 = stencil.access %u_t0_blk[0, 1, 0] : !stencil.temp<[-1,52]x[-1,102]x[-1,102]xf32> +// CHECK-NEXT: %33 = arith.mulf %21, %32 : f32 +// CHECK-NEXT: %34 = arith.addf %31, %33 : f32 +// CHECK-NEXT: %35 = arith.addf %34, %27 : f32 +// CHECK-NEXT: %36 = stencil.access %u_t0_blk[0, 0, -1] : !stencil.temp<[-1,52]x[-1,102]x[-1,102]xf32> +// CHECK-NEXT: %37 = arith.mulf %21, %36 : f32 +// CHECK-NEXT: %38 = stencil.access %u_t0_blk[0, 0, 1] : !stencil.temp<[-1,52]x[-1,102]x[-1,102]xf32> +// CHECK-NEXT: %39 = arith.mulf %21, %38 : f32 +// CHECK-NEXT: %40 = arith.addf %37, %39 : f32 +// CHECK-NEXT: %41 = arith.addf %40, %27 : f32 +// CHECK-NEXT: %42 = arith.addf %20, %29 : f32 +// CHECK-NEXT: %43 = arith.addf %42, %35 : f32 +// CHECK-NEXT: %44 = arith.addf %43, %41 : f32 +// CHECK-NEXT: %45 = arith.mulf %8, %44 : f32 +// CHECK-NEXT: stencil.return %45 : f32 // CHECK-NEXT: } -// CHECK-NEXT: stencil.store %u_t1_temp to %u_t1(<[0, 0, 0], [51, 101, 101]>) : !stencil.temp<[0,51]x[0,101]x[0,101]xf32> to !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32> +// CHECK-NEXT: stencil.store %u_t1_temp to %u_t1(<[0, 0, 0], [51, 101, 101]>) : !stencil.temp<[0,51]x[0,101]x[0,101]xf32> to !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32> // CHECK-NEXT: scf.yield %u_t1, %u_t2, %u_t0 : !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32> // CHECK-NEXT: } -// CHECK-NEXT: %6 = func.call @timer_end(%0) : (f64) -> f64 -// CHECK-NEXT: "llvm.store"(%6, %timers) <{"ordering" = 0 : i64}> : (f64, !llvm.ptr) -> () +// CHECK-NEXT: %7 = func.call @timer_end(%0) : (f64) -> f64 +// CHECK-NEXT: "llvm.store"(%7, %timers) <{"ordering" = 0 : i64}> : (f64, !llvm.ptr) -> () // CHECK-NEXT: func.return // CHECK-NEXT: } // CHECK-NEXT: func.func private @timer_start() -> f64 // CHECK-NEXT: func.func private @timer_end(f64) -> f64 -// CHECK-NEXT: } \ No newline at end of file +// CHECK-NEXT: } diff --git a/tests/filecheck/xdsl_mpi_pipeline_b.mlir b/tests/filecheck/xdsl_mpi_pipeline_b.mlir index 684e714266..aa4351c7c9 100644 --- a/tests/filecheck/xdsl_mpi_pipeline_b.mlir +++ b/tests/filecheck/xdsl_mpi_pipeline_b.mlir @@ -1,4 +1,4 @@ -// RUN: xdsl-opt -p "canonicalize,cse,distribute-stencil{strategy=3d-grid slices=2,1,1 restrict_domain=false},shape-inference,canonicalize-dmp" %s | filecheck %s +// RUN: xdsl-opt -p "canonicalize,cse,distribute-stencil{strategy=3d-grid slices=2,1,1 restrict_domain=false},shape-inference,canonicalize-dmp,canonicalize,cse" %s | filecheck %s builtin.module { func.func @Kernel(%u_vec0 : !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, %u_vec1 : !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, %u_vec2 : !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, %timers : !llvm.ptr) { @@ -111,97 +111,56 @@ builtin.module { // CHECK-NEXT: %0 = func.call @timer_start() : () -> f64 // CHECK-NEXT: %time_m = arith.constant 1 : index // CHECK-NEXT: %time_M = arith.constant 20 : index -// CHECK-NEXT: %1 = arith.constant 1 : index -// CHECK-NEXT: %2 = arith.addi %time_M, %1 : index -// CHECK-NEXT: %step = arith.constant 1 : index -// CHECK-NEXT: %3, %4, %5 = scf.for %time = %time_m to %2 step %step iter_args(%u_t0 = %u_vec0, %u_t1 = %u_vec1, %u_t2 = %u_vec2) -> (!stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>) { +// CHECK-NEXT: %1 = arith.addi %time_M, %time_m : index +// CHECK-NEXT: %2, %3, %4 = scf.for %time = %time_m to %1 step %time_m iter_args(%u_t0 = %u_vec0, %u_t1 = %u_vec1, %u_t2 = %u_vec2) -> (!stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>) { // CHECK-NEXT: %u_t0_temp = stencil.load %u_t0 : !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32> -> !stencil.temp<[-1,52]x[-1,102]x[-1,102]xf32> -// CHECK-NEXT: "dmp.swap"(%u_t0_temp) {"topo" = #dmp.topo<2x1x1>, "swaps" = [#dmp.exchange, #dmp.exchange, #dmp.exchange, #dmp.exchange, #dmp.exchange, #dmp.exchange]} : (!stencil.temp<[-1,52]x[-1,102]x[-1,102]xf32>) -> () +// CHECK-NEXT: %5 = "dmp.swap"(%u_t0_temp) {"strategy" = #dmp.grid_slice_3d<#dmp.topo<2x1x1>, false>, "swaps" = [#dmp.exchange, #dmp.exchange, #dmp.exchange, #dmp.exchange, #dmp.exchange, #dmp.exchange]} : (!stencil.temp<[-1,52]x[-1,102]x[-1,102]xf32>) -> !stencil.temp<[-1,52]x[-1,102]x[-1,102]xf32> // CHECK-NEXT: %u_t2_temp = stencil.load %u_t2 : !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32> -> !stencil.temp<[0,51]x[0,101]x[0,101]xf32> -// CHECK-NEXT: %u_t1_temp = stencil.apply(%u_t0_blk = %u_t0_temp : !stencil.temp<[-1,52]x[-1,102]x[-1,102]xf32>, %u_t2_blk = %u_t2_temp : !stencil.temp<[0,51]x[0,101]x[0,101]xf32>) -> (!stencil.temp<[0,51]x[0,101]x[0,101]xf32>) { +// CHECK-NEXT: %u_t1_temp = stencil.apply(%u_t0_blk = %5 : !stencil.temp<[-1,52]x[-1,102]x[-1,102]xf32>, %u_t2_blk = %u_t2_temp : !stencil.temp<[0,51]x[0,101]x[0,101]xf32>) -> (!stencil.temp<[0,51]x[0,101]x[0,101]xf32>) { // CHECK-NEXT: %dt = arith.constant 1.000000e-04 : f32 // CHECK-NEXT: %6 = arith.constant 2 : i64 // CHECK-NEXT: %7 = "math.fpowi"(%dt, %6) : (f32, i64) -> f32 // CHECK-NEXT: %8 = arith.constant -1 : i64 -// CHECK-NEXT: %dt_1 = arith.constant 1.000000e-04 : f32 // CHECK-NEXT: %9 = arith.constant -2 : i64 -// CHECK-NEXT: %10 = "math.fpowi"(%dt_1, %9) : (f32, i64) -> f32 +// CHECK-NEXT: %10 = "math.fpowi"(%dt, %9) : (f32, i64) -> f32 // CHECK-NEXT: %11 = stencil.access %u_t2_blk[0, 0, 0] : !stencil.temp<[0,51]x[0,101]x[0,101]xf32> // CHECK-NEXT: %12 = arith.mulf %10, %11 : f32 // CHECK-NEXT: %13 = arith.constant -2.000000e+00 : f32 -// CHECK-NEXT: %dt_2 = arith.constant 1.000000e-04 : f32 -// CHECK-NEXT: %14 = arith.constant -2 : i64 -// CHECK-NEXT: %15 = "math.fpowi"(%dt_2, %14) : (f32, i64) -> f32 -// CHECK-NEXT: %16 = stencil.access %u_t0_blk[0, 0, 0] : !stencil.temp<[-1,52]x[-1,102]x[-1,102]xf32> -// CHECK-NEXT: %17 = arith.mulf %13, %15 : f32 -// CHECK-NEXT: %18 = arith.mulf %17, %16 : f32 -// CHECK-NEXT: %19 = arith.addf %12, %18 : f32 -// CHECK-NEXT: %20 = arith.sitofp %8 : i64 to f32 -// CHECK-NEXT: %21 = arith.mulf %20, %19 : f32 +// CHECK-NEXT: %14 = stencil.access %u_t0_blk[0, 0, 0] : !stencil.temp<[-1,52]x[-1,102]x[-1,102]xf32> +// CHECK-NEXT: %15 = arith.mulf %13, %10 : f32 +// CHECK-NEXT: %16 = arith.mulf %15, %14 : f32 +// CHECK-NEXT: %17 = arith.addf %12, %16 : f32 +// CHECK-NEXT: %18 = arith.sitofp %8 : i64 to f32 +// CHECK-NEXT: %19 = arith.mulf %18, %17 : f32 // CHECK-NEXT: %h_x = arith.constant 1.000000e-02 : f32 -// CHECK-NEXT: %22 = arith.constant -2 : i64 -// CHECK-NEXT: %23 = "math.fpowi"(%h_x, %22) : (f32, i64) -> f32 -// CHECK-NEXT: %24 = stencil.access %u_t0_blk[-1, 0, 0] : !stencil.temp<[-1,52]x[-1,102]x[-1,102]xf32> -// CHECK-NEXT: %25 = arith.mulf %23, %24 : f32 -// CHECK-NEXT: %h_x_1 = arith.constant 1.000000e-02 : f32 -// CHECK-NEXT: %26 = arith.constant -2 : i64 -// CHECK-NEXT: %27 = "math.fpowi"(%h_x_1, %26) : (f32, i64) -> f32 -// CHECK-NEXT: %28 = stencil.access %u_t0_blk[1, 0, 0] : !stencil.temp<[-1,52]x[-1,102]x[-1,102]xf32> -// CHECK-NEXT: %29 = arith.mulf %27, %28 : f32 -// CHECK-NEXT: %30 = arith.constant -2.000000e+00 : f32 -// CHECK-NEXT: %h_x_2 = arith.constant 1.000000e-02 : f32 -// CHECK-NEXT: %31 = arith.constant -2 : i64 -// CHECK-NEXT: %32 = "math.fpowi"(%h_x_2, %31) : (f32, i64) -> f32 -// CHECK-NEXT: %33 = stencil.access %u_t0_blk[0, 0, 0] : !stencil.temp<[-1,52]x[-1,102]x[-1,102]xf32> -// CHECK-NEXT: %34 = arith.mulf %30, %32 : f32 -// CHECK-NEXT: %35 = arith.mulf %34, %33 : f32 -// CHECK-NEXT: %36 = arith.addf %25, %29 : f32 -// CHECK-NEXT: %37 = arith.addf %36, %35 : f32 -// CHECK-NEXT: %h_y = arith.constant 1.000000e-02 : f32 -// CHECK-NEXT: %38 = arith.constant -2 : i64 -// CHECK-NEXT: %39 = "math.fpowi"(%h_y, %38) : (f32, i64) -> f32 -// CHECK-NEXT: %40 = stencil.access %u_t0_blk[0, -1, 0] : !stencil.temp<[-1,52]x[-1,102]x[-1,102]xf32> -// CHECK-NEXT: %41 = arith.mulf %39, %40 : f32 -// CHECK-NEXT: %h_y_1 = arith.constant 1.000000e-02 : f32 -// CHECK-NEXT: %42 = arith.constant -2 : i64 -// CHECK-NEXT: %43 = "math.fpowi"(%h_y_1, %42) : (f32, i64) -> f32 -// CHECK-NEXT: %44 = stencil.access %u_t0_blk[0, 1, 0] : !stencil.temp<[-1,52]x[-1,102]x[-1,102]xf32> -// CHECK-NEXT: %45 = arith.mulf %43, %44 : f32 -// CHECK-NEXT: %46 = arith.constant -2.000000e+00 : f32 -// CHECK-NEXT: %h_y_2 = arith.constant 1.000000e-02 : f32 -// CHECK-NEXT: %47 = arith.constant -2 : i64 -// CHECK-NEXT: %48 = "math.fpowi"(%h_y_2, %47) : (f32, i64) -> f32 -// CHECK-NEXT: %49 = stencil.access %u_t0_blk[0, 0, 0] : !stencil.temp<[-1,52]x[-1,102]x[-1,102]xf32> -// CHECK-NEXT: %50 = arith.mulf %46, %48 : f32 -// CHECK-NEXT: %51 = arith.mulf %50, %49 : f32 -// CHECK-NEXT: %52 = arith.addf %41, %45 : f32 -// CHECK-NEXT: %53 = arith.addf %52, %51 : f32 -// CHECK-NEXT: %h_z = arith.constant 1.000000e-02 : f32 -// CHECK-NEXT: %54 = arith.constant -2 : i64 -// CHECK-NEXT: %55 = "math.fpowi"(%h_z, %54) : (f32, i64) -> f32 -// CHECK-NEXT: %56 = stencil.access %u_t0_blk[0, 0, -1] : !stencil.temp<[-1,52]x[-1,102]x[-1,102]xf32> -// CHECK-NEXT: %57 = arith.mulf %55, %56 : f32 -// CHECK-NEXT: %h_z_1 = arith.constant 1.000000e-02 : f32 -// CHECK-NEXT: %58 = arith.constant -2 : i64 -// CHECK-NEXT: %59 = "math.fpowi"(%h_z_1, %58) : (f32, i64) -> f32 -// CHECK-NEXT: %60 = stencil.access %u_t0_blk[0, 0, 1] : !stencil.temp<[-1,52]x[-1,102]x[-1,102]xf32> -// CHECK-NEXT: %61 = arith.mulf %59, %60 : f32 -// CHECK-NEXT: %62 = arith.constant -2.000000e+00 : f32 -// CHECK-NEXT: %h_z_2 = arith.constant 1.000000e-02 : f32 -// CHECK-NEXT: %63 = arith.constant -2 : i64 -// CHECK-NEXT: %64 = "math.fpowi"(%h_z_2, %63) : (f32, i64) -> f32 -// CHECK-NEXT: %65 = stencil.access %u_t0_blk[0, 0, 0] : !stencil.temp<[-1,52]x[-1,102]x[-1,102]xf32> -// CHECK-NEXT: %66 = arith.mulf %62, %64 : f32 -// CHECK-NEXT: %67 = arith.mulf %66, %65 : f32 -// CHECK-NEXT: %68 = arith.addf %57, %61 : f32 -// CHECK-NEXT: %69 = arith.addf %68, %67 : f32 -// CHECK-NEXT: %70 = arith.addf %21, %37 : f32 -// CHECK-NEXT: %71 = arith.addf %70, %53 : f32 -// CHECK-NEXT: %72 = arith.addf %71, %69 : f32 -// CHECK-NEXT: %73 = arith.mulf %7, %72 : f32 -// CHECK-NEXT: stencil.return %73 : f32 +// CHECK-NEXT: %20 = "math.fpowi"(%h_x, %9) : (f32, i64) -> f32 +// CHECK-NEXT: %21 = stencil.access %u_t0_blk[-1, 0, 0] : !stencil.temp<[-1,52]x[-1,102]x[-1,102]xf32> +// CHECK-NEXT: %22 = arith.mulf %20, %21 : f32 +// CHECK-NEXT: %23 = stencil.access %u_t0_blk[1, 0, 0] : !stencil.temp<[-1,52]x[-1,102]x[-1,102]xf32> +// CHECK-NEXT: %24 = arith.mulf %20, %23 : f32 +// CHECK-NEXT: %25 = arith.mulf %13, %20 : f32 +// CHECK-NEXT: %26 = arith.mulf %25, %14 : f32 +// CHECK-NEXT: %27 = arith.addf %22, %24 : f32 +// CHECK-NEXT: %28 = arith.addf %27, %26 : f32 +// CHECK-NEXT: %29 = stencil.access %u_t0_blk[0, -1, 0] : !stencil.temp<[-1,52]x[-1,102]x[-1,102]xf32> +// CHECK-NEXT: %30 = arith.mulf %20, %29 : f32 +// CHECK-NEXT: %31 = stencil.access %u_t0_blk[0, 1, 0] : !stencil.temp<[-1,52]x[-1,102]x[-1,102]xf32> +// CHECK-NEXT: %32 = arith.mulf %20, %31 : f32 +// CHECK-NEXT: %33 = arith.addf %30, %32 : f32 +// CHECK-NEXT: %34 = arith.addf %33, %26 : f32 +// CHECK-NEXT: %35 = stencil.access %u_t0_blk[0, 0, -1] : !stencil.temp<[-1,52]x[-1,102]x[-1,102]xf32> +// CHECK-NEXT: %36 = arith.mulf %20, %35 : f32 +// CHECK-NEXT: %37 = stencil.access %u_t0_blk[0, 0, 1] : !stencil.temp<[-1,52]x[-1,102]x[-1,102]xf32> +// CHECK-NEXT: %38 = arith.mulf %20, %37 : f32 +// CHECK-NEXT: %39 = arith.addf %36, %38 : f32 +// CHECK-NEXT: %40 = arith.addf %39, %26 : f32 +// CHECK-NEXT: %41 = arith.addf %19, %28 : f32 +// CHECK-NEXT: %42 = arith.addf %41, %34 : f32 +// CHECK-NEXT: %43 = arith.addf %42, %40 : f32 +// CHECK-NEXT: %44 = arith.mulf %7, %43 : f32 +// CHECK-NEXT: stencil.return %44 : f32 // CHECK-NEXT: } -// CHECK-NEXT: stencil.store %u_t1_temp to %u_t1(<[0, 0, 0], [51, 101, 101]>) : !stencil.temp<[0,51]x[0,101]x[0,101]xf32> to !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32> +// CHECK-NEXT: stencil.store %u_t1_temp to %u_t1(<[0, 0, 0], [51, 101, 101]>) : !stencil.temp<[0,51]x[0,101]x[0,101]xf32> to !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32> // CHECK-NEXT: scf.yield %u_t1, %u_t2, %u_t0 : !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32> // CHECK-NEXT: } // CHECK-NEXT: %6 = func.call @timer_end(%0) : (f64) -> f64 @@ -210,4 +169,4 @@ builtin.module { // CHECK-NEXT: } // CHECK-NEXT: func.func private @timer_start() -> f64 // CHECK-NEXT: func.func private @timer_end(f64) -> f64 -// CHECK-NEXT: } \ No newline at end of file +// CHECK-NEXT: } diff --git a/tests/filecheck/xdsl_mpi_pipeline_c.mlir b/tests/filecheck/xdsl_mpi_pipeline_c.mlir index e94a7d2504..0ab85d4468 100644 --- a/tests/filecheck/xdsl_mpi_pipeline_c.mlir +++ b/tests/filecheck/xdsl_mpi_pipeline_c.mlir @@ -1,4 +1,4 @@ -// RUN: xdsl-opt -p "canonicalize,cse,distribute-stencil{strategy=3d-grid slices=2,1,1 restrict_domain=false},shape-inference,canonicalize-dmp,stencil-bufferize,dmp-to-mpi{mpi_init=false},convert-stencil-to-ll-mlir" %s | filecheck %s +// RUN: xdsl-opt -p "canonicalize,cse,distribute-stencil{strategy=3d-grid slices=2,1,1 restrict_domain=false},shape-inference,canonicalize-dmp,stencil-bufferize,dmp-to-mpi{mpi_init=false},convert-stencil-to-ll-mlir,canonicalize,cse" %s | filecheck %s builtin.module { func.func @Kernel(%u_vec0 : !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, %u_vec1 : !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, %u_vec2 : !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, %timers : !llvm.ptr) { @@ -111,127 +111,322 @@ builtin.module { // CHECK-NEXT: %0 = func.call @timer_start() : () -> f64 // CHECK-NEXT: %time_m = arith.constant 1 : index // CHECK-NEXT: %time_M = arith.constant 20 : index -// CHECK-NEXT: %1 = arith.constant 1 : index -// CHECK-NEXT: %2 = arith.addi %time_M, %1 : index -// CHECK-NEXT: %step = arith.constant 1 : index -// CHECK-NEXT: %3, %4, %5 = scf.for %time = %time_m to %2 step %step iter_args(%u_t0 = %u_vec0, %u_t1 = %u_vec1, %u_t2 = %u_vec2) -> (memref<55x105x105xf32>, memref<55x105x105xf32>, memref<55x105x105xf32>) { -// CHECK-NEXT: %u_t1_storeview = "memref.subview"(%u_t1) <{"static_offsets" = array, "static_sizes" = array, "static_strides" = array, "operandSegmentSizes" = array}> : (memref<55x105x105xf32>) -> memref<51x101x101xf32, strided<[11025, 105, 1], offset: 22262>> -// CHECK-NEXT: %u_t0_loadview = "memref.subview"(%u_t0) <{"static_offsets" = array, "static_sizes" = array, "static_strides" = array, "operandSegmentSizes" = array}> : (memref<55x105x105xf32>) -> memref<53x103x103xf32, strided<[11025, 105, 1], offset: 22262>> -// CHECK-NEXT: "dmp.swap"(%u_t0_loadview) {"topo" = #dmp.topo<2x1x1>, "swaps" = [#dmp.exchange, #dmp.exchange, #dmp.exchange, #dmp.exchange, #dmp.exchange, #dmp.exchange]} : (memref<53x103x103xf32, strided<[11025, 105, 1], offset: 22262>>) -> () -// CHECK-NEXT: %u_t2_loadview = "memref.subview"(%u_t2) <{"static_offsets" = array, "static_sizes" = array, "static_strides" = array, "operandSegmentSizes" = array}> : (memref<55x105x105xf32>) -> memref<51x101x101xf32, strided<[11025, 105, 1], offset: 22262>> -// CHECK-NEXT: %6 = arith.constant 0 : index -// CHECK-NEXT: %7 = arith.constant 0 : index -// CHECK-NEXT: %8 = arith.constant 0 : index -// CHECK-NEXT: %9 = arith.constant 1 : index -// CHECK-NEXT: %10 = arith.constant 1 : index -// CHECK-NEXT: %11 = arith.constant 1 : index -// CHECK-NEXT: %12 = arith.constant 51 : index -// CHECK-NEXT: %13 = arith.constant 101 : index -// CHECK-NEXT: %14 = arith.constant 101 : index -// CHECK-NEXT: "scf.parallel"(%6, %7, %8, %12, %13, %14, %9, %10, %11) <{"operandSegmentSizes" = array}> ({ -// CHECK-NEXT: ^0(%15 : index, %16 : index, %17 : index): +// CHECK-NEXT: %1 = arith.addi %time_M, %time_m : index +// CHECK-NEXT: %2 = arith.constant 12 : i32 +// CHECK-NEXT: %3 = "mpi.allocate"(%2) {"dtype" = !mpi.request} : (i32) -> !mpi.vector +// CHECK-NEXT: %4 = "mpi.comm.rank"() : () -> i32 +// CHECK-NEXT: %send_buff_ex0 = memref.alloc() {"alignment" = 64 : i64} : memref<101x101xf32> +// CHECK-NEXT: %send_buff_ex0_ptr, %5, %6 = "mpi.unwrap_memref"(%send_buff_ex0) : (memref<101x101xf32>) -> (!llvm.ptr, i32, !mpi.datatype) +// CHECK-NEXT: %recv_buff_ex0 = memref.alloc() {"alignment" = 64 : i64} : memref<101x101xf32> +// CHECK-NEXT: %recv_buff_ex0_ptr, %7, %8 = "mpi.unwrap_memref"(%recv_buff_ex0) : (memref<101x101xf32>) -> (!llvm.ptr, i32, !mpi.datatype) +// CHECK-NEXT: %send_buff_ex1 = memref.alloc() {"alignment" = 64 : i64} : memref<101x101xf32> +// CHECK-NEXT: %send_buff_ex1_ptr, %9, %10 = "mpi.unwrap_memref"(%send_buff_ex1) : (memref<101x101xf32>) -> (!llvm.ptr, i32, !mpi.datatype) +// CHECK-NEXT: %recv_buff_ex1 = memref.alloc() {"alignment" = 64 : i64} : memref<101x101xf32> +// CHECK-NEXT: %recv_buff_ex1_ptr, %11, %12 = "mpi.unwrap_memref"(%recv_buff_ex1) : (memref<101x101xf32>) -> (!llvm.ptr, i32, !mpi.datatype) +// CHECK-NEXT: %send_buff_ex2 = memref.alloc() {"alignment" = 64 : i64} : memref<51x101xf32> +// CHECK-NEXT: %send_buff_ex2_ptr, %13, %14 = "mpi.unwrap_memref"(%send_buff_ex2) : (memref<51x101xf32>) -> (!llvm.ptr, i32, !mpi.datatype) +// CHECK-NEXT: %recv_buff_ex2 = memref.alloc() {"alignment" = 64 : i64} : memref<51x101xf32> +// CHECK-NEXT: %recv_buff_ex2_ptr, %15, %16 = "mpi.unwrap_memref"(%recv_buff_ex2) : (memref<51x101xf32>) -> (!llvm.ptr, i32, !mpi.datatype) +// CHECK-NEXT: %send_buff_ex3 = memref.alloc() {"alignment" = 64 : i64} : memref<51x101xf32> +// CHECK-NEXT: %send_buff_ex3_ptr, %17, %18 = "mpi.unwrap_memref"(%send_buff_ex3) : (memref<51x101xf32>) -> (!llvm.ptr, i32, !mpi.datatype) +// CHECK-NEXT: %recv_buff_ex3 = memref.alloc() {"alignment" = 64 : i64} : memref<51x101xf32> +// CHECK-NEXT: %recv_buff_ex3_ptr, %19, %20 = "mpi.unwrap_memref"(%recv_buff_ex3) : (memref<51x101xf32>) -> (!llvm.ptr, i32, !mpi.datatype) +// CHECK-NEXT: %send_buff_ex4 = memref.alloc() {"alignment" = 64 : i64} : memref<51x101xf32> +// CHECK-NEXT: %send_buff_ex4_ptr, %21, %22 = "mpi.unwrap_memref"(%send_buff_ex4) : (memref<51x101xf32>) -> (!llvm.ptr, i32, !mpi.datatype) +// CHECK-NEXT: %recv_buff_ex4 = memref.alloc() {"alignment" = 64 : i64} : memref<51x101xf32> +// CHECK-NEXT: %recv_buff_ex4_ptr, %23, %24 = "mpi.unwrap_memref"(%recv_buff_ex4) : (memref<51x101xf32>) -> (!llvm.ptr, i32, !mpi.datatype) +// CHECK-NEXT: %send_buff_ex5 = memref.alloc() {"alignment" = 64 : i64} : memref<51x101xf32> +// CHECK-NEXT: %send_buff_ex5_ptr, %25, %26 = "mpi.unwrap_memref"(%send_buff_ex5) : (memref<51x101xf32>) -> (!llvm.ptr, i32, !mpi.datatype) +// CHECK-NEXT: %recv_buff_ex5 = memref.alloc() {"alignment" = 64 : i64} : memref<51x101xf32> +// CHECK-NEXT: %recv_buff_ex5_ptr, %27, %28 = "mpi.unwrap_memref"(%recv_buff_ex5) : (memref<51x101xf32>) -> (!llvm.ptr, i32, !mpi.datatype) +// CHECK-NEXT: %29, %30, %31 = scf.for %time = %time_m to %1 step %time_m iter_args(%u_t0 = %u_vec0, %u_t1 = %u_vec1, %u_t2 = %u_vec2) -> (memref<55x105x105xf32>, memref<55x105x105xf32>, memref<55x105x105xf32>) { +// CHECK-NEXT: %32 = arith.constant 0 : i32 +// CHECK-NEXT: %33 = arith.constant 1 : i32 +// CHECK-NEXT: %34 = arith.divui %4, %33 : i32 +// CHECK-NEXT: %35 = arith.remui %4, %33 : i32 +// CHECK-NEXT: %36 = arith.divui %35, %33 : i32 +// CHECK-NEXT: %37 = arith.remui %35, %33 : i32 +// CHECK-NEXT: %38 = arith.divui %37, %33 : i32 +// CHECK-NEXT: %39 = arith.remui %37, %33 : i32 +// CHECK-NEXT: %40 = arith.addi %34, %33 : i32 +// CHECK-NEXT: %41 = arith.constant 2 : i32 +// CHECK-NEXT: %42 = arith.cmpi slt, %40, %41 : i32 +// CHECK-NEXT: %43 = arith.constant true +// CHECK-NEXT: %44 = arith.andi %42, %43 : i1 +// CHECK-NEXT: %45 = arith.andi %44, %43 : i1 +// CHECK-NEXT: %46 = arith.muli %33, %40 : i32 +// CHECK-NEXT: %47 = arith.addi %38, %46 : i32 +// CHECK-NEXT: %48 = arith.muli %33, %36 : i32 +// CHECK-NEXT: %49 = arith.addi %47, %48 : i32 +// CHECK-NEXT: %50 = arith.constant 6 : i32 +// CHECK-NEXT: %51 = "mpi.vector_get"(%3, %32) : (!mpi.vector, i32) -> !mpi.request +// CHECK-NEXT: %52 = "mpi.vector_get"(%3, %50) : (!mpi.vector, i32) -> !mpi.request +// CHECK-NEXT: "scf.if"(%45) ({ +// CHECK-NEXT: %53 = builtin.unrealized_conversion_cast %u_t0 : memref<55x105x105xf32> to memref<55x105x105xf32> +// CHECK-NEXT: %54 = memref.subview %53[52, 2, 2] [1, 101, 101] [1, 1, 1] : memref<55x105x105xf32> to memref<101x101xf32, strided<[105, 1], offset: 573512>> +// CHECK-NEXT: "memref.copy"(%54, %send_buff_ex0) : (memref<101x101xf32, strided<[105, 1], offset: 573512>>, memref<101x101xf32>) -> () +// CHECK-NEXT: "mpi.isend"(%send_buff_ex0_ptr, %5, %6, %49, %32, %51) : (!llvm.ptr, i32, !mpi.datatype, i32, i32, !mpi.request) -> () +// CHECK-NEXT: "mpi.irecv"(%recv_buff_ex0_ptr, %7, %8, %49, %32, %52) : (!llvm.ptr, i32, !mpi.datatype, i32, i32, !mpi.request) -> () +// CHECK-NEXT: scf.yield +// CHECK-NEXT: }, { +// CHECK-NEXT: "mpi.request_null"(%51) : (!mpi.request) -> () +// CHECK-NEXT: "mpi.request_null"(%52) : (!mpi.request) -> () +// CHECK-NEXT: scf.yield +// CHECK-NEXT: }) : (i1) -> () +// CHECK-NEXT: %55 = arith.remui %4, %33 : i32 +// CHECK-NEXT: %56 = arith.divui %55, %33 : i32 +// CHECK-NEXT: %57 = arith.remui %55, %33 : i32 +// CHECK-NEXT: %58 = arith.divui %57, %33 : i32 +// CHECK-NEXT: %59 = arith.remui %57, %33 : i32 +// CHECK-NEXT: %60 = arith.constant -1 : i32 +// CHECK-NEXT: %61 = arith.addi %34, %60 : i32 +// CHECK-NEXT: %62 = arith.cmpi sge, %61, %32 : i32 +// CHECK-NEXT: %63 = arith.andi %62, %43 : i1 +// CHECK-NEXT: %64 = arith.andi %63, %43 : i1 +// CHECK-NEXT: %65 = arith.muli %33, %61 : i32 +// CHECK-NEXT: %66 = arith.addi %58, %65 : i32 +// CHECK-NEXT: %67 = arith.muli %33, %56 : i32 +// CHECK-NEXT: %68 = arith.addi %66, %67 : i32 +// CHECK-NEXT: %69 = arith.constant 7 : i32 +// CHECK-NEXT: %70 = "mpi.vector_get"(%3, %33) : (!mpi.vector, i32) -> !mpi.request +// CHECK-NEXT: %71 = "mpi.vector_get"(%3, %69) : (!mpi.vector, i32) -> !mpi.request +// CHECK-NEXT: "scf.if"(%64) ({ +// CHECK-NEXT: %72 = builtin.unrealized_conversion_cast %u_t0 : memref<55x105x105xf32> to memref<55x105x105xf32> +// CHECK-NEXT: %73 = memref.subview %72[2, 2, 2] [1, 101, 101] [1, 1, 1] : memref<55x105x105xf32> to memref<101x101xf32, strided<[105, 1], offset: 22262>> +// CHECK-NEXT: "memref.copy"(%73, %send_buff_ex1) : (memref<101x101xf32, strided<[105, 1], offset: 22262>>, memref<101x101xf32>) -> () +// CHECK-NEXT: "mpi.isend"(%send_buff_ex1_ptr, %9, %10, %68, %32, %70) : (!llvm.ptr, i32, !mpi.datatype, i32, i32, !mpi.request) -> () +// CHECK-NEXT: "mpi.irecv"(%recv_buff_ex1_ptr, %11, %12, %68, %32, %71) : (!llvm.ptr, i32, !mpi.datatype, i32, i32, !mpi.request) -> () +// CHECK-NEXT: scf.yield +// CHECK-NEXT: }, { +// CHECK-NEXT: "mpi.request_null"(%70) : (!mpi.request) -> () +// CHECK-NEXT: "mpi.request_null"(%71) : (!mpi.request) -> () +// CHECK-NEXT: scf.yield +// CHECK-NEXT: }) : (i1) -> () +// CHECK-NEXT: %74 = arith.remui %4, %33 : i32 +// CHECK-NEXT: %75 = arith.divui %74, %33 : i32 +// CHECK-NEXT: %76 = arith.remui %74, %33 : i32 +// CHECK-NEXT: %77 = arith.divui %76, %33 : i32 +// CHECK-NEXT: %78 = arith.remui %76, %33 : i32 +// CHECK-NEXT: %79 = arith.addi %75, %33 : i32 +// CHECK-NEXT: %80 = arith.cmpi slt, %79, %33 : i32 +// CHECK-NEXT: %81 = arith.andi %43, %80 : i1 +// CHECK-NEXT: %82 = arith.andi %81, %43 : i1 +// CHECK-NEXT: %83 = arith.muli %33, %34 : i32 +// CHECK-NEXT: %84 = arith.addi %77, %83 : i32 +// CHECK-NEXT: %85 = arith.muli %33, %79 : i32 +// CHECK-NEXT: %86 = arith.addi %84, %85 : i32 +// CHECK-NEXT: %87 = arith.constant 8 : i32 +// CHECK-NEXT: %88 = "mpi.vector_get"(%3, %41) : (!mpi.vector, i32) -> !mpi.request +// CHECK-NEXT: %89 = "mpi.vector_get"(%3, %87) : (!mpi.vector, i32) -> !mpi.request +// CHECK-NEXT: "scf.if"(%82) ({ +// CHECK-NEXT: %90 = builtin.unrealized_conversion_cast %u_t0 : memref<55x105x105xf32> to memref<55x105x105xf32> +// CHECK-NEXT: %91 = memref.subview %90[2, 102, 2] [51, 1, 101] [1, 1, 1] : memref<55x105x105xf32> to memref<51x101xf32, strided<[11025, 1], offset: 32762>> +// CHECK-NEXT: "memref.copy"(%91, %send_buff_ex2) : (memref<51x101xf32, strided<[11025, 1], offset: 32762>>, memref<51x101xf32>) -> () +// CHECK-NEXT: "mpi.isend"(%send_buff_ex2_ptr, %13, %14, %86, %32, %88) : (!llvm.ptr, i32, !mpi.datatype, i32, i32, !mpi.request) -> () +// CHECK-NEXT: "mpi.irecv"(%recv_buff_ex2_ptr, %15, %16, %86, %32, %89) : (!llvm.ptr, i32, !mpi.datatype, i32, i32, !mpi.request) -> () +// CHECK-NEXT: scf.yield +// CHECK-NEXT: }, { +// CHECK-NEXT: "mpi.request_null"(%88) : (!mpi.request) -> () +// CHECK-NEXT: "mpi.request_null"(%89) : (!mpi.request) -> () +// CHECK-NEXT: scf.yield +// CHECK-NEXT: }) : (i1) -> () +// CHECK-NEXT: %92 = arith.remui %4, %33 : i32 +// CHECK-NEXT: %93 = arith.divui %92, %33 : i32 +// CHECK-NEXT: %94 = arith.remui %92, %33 : i32 +// CHECK-NEXT: %95 = arith.divui %94, %33 : i32 +// CHECK-NEXT: %96 = arith.remui %94, %33 : i32 +// CHECK-NEXT: %97 = arith.addi %93, %60 : i32 +// CHECK-NEXT: %98 = arith.cmpi sge, %97, %32 : i32 +// CHECK-NEXT: %99 = arith.andi %43, %98 : i1 +// CHECK-NEXT: %100 = arith.andi %99, %43 : i1 +// CHECK-NEXT: %101 = arith.addi %95, %83 : i32 +// CHECK-NEXT: %102 = arith.muli %33, %97 : i32 +// CHECK-NEXT: %103 = arith.addi %101, %102 : i32 +// CHECK-NEXT: %104 = arith.constant 3 : i32 +// CHECK-NEXT: %105 = arith.constant 9 : i32 +// CHECK-NEXT: %106 = "mpi.vector_get"(%3, %104) : (!mpi.vector, i32) -> !mpi.request +// CHECK-NEXT: %107 = "mpi.vector_get"(%3, %105) : (!mpi.vector, i32) -> !mpi.request +// CHECK-NEXT: "scf.if"(%100) ({ +// CHECK-NEXT: %108 = builtin.unrealized_conversion_cast %u_t0 : memref<55x105x105xf32> to memref<55x105x105xf32> +// CHECK-NEXT: %109 = memref.subview %108[2, 2, 2] [51, 1, 101] [1, 1, 1] : memref<55x105x105xf32> to memref<51x101xf32, strided<[11025, 1], offset: 22262>> +// CHECK-NEXT: "memref.copy"(%109, %send_buff_ex3) : (memref<51x101xf32, strided<[11025, 1], offset: 22262>>, memref<51x101xf32>) -> () +// CHECK-NEXT: "mpi.isend"(%send_buff_ex3_ptr, %17, %18, %103, %32, %106) : (!llvm.ptr, i32, !mpi.datatype, i32, i32, !mpi.request) -> () +// CHECK-NEXT: "mpi.irecv"(%recv_buff_ex3_ptr, %19, %20, %103, %32, %107) : (!llvm.ptr, i32, !mpi.datatype, i32, i32, !mpi.request) -> () +// CHECK-NEXT: scf.yield +// CHECK-NEXT: }, { +// CHECK-NEXT: "mpi.request_null"(%106) : (!mpi.request) -> () +// CHECK-NEXT: "mpi.request_null"(%107) : (!mpi.request) -> () +// CHECK-NEXT: scf.yield +// CHECK-NEXT: }) : (i1) -> () +// CHECK-NEXT: %110 = arith.remui %4, %33 : i32 +// CHECK-NEXT: %111 = arith.divui %110, %33 : i32 +// CHECK-NEXT: %112 = arith.remui %110, %33 : i32 +// CHECK-NEXT: %113 = arith.divui %112, %33 : i32 +// CHECK-NEXT: %114 = arith.remui %112, %33 : i32 +// CHECK-NEXT: %115 = arith.addi %113, %33 : i32 +// CHECK-NEXT: %116 = arith.cmpi slt, %115, %33 : i32 +// CHECK-NEXT: %117 = arith.andi %43, %43 : i1 +// CHECK-NEXT: %118 = arith.andi %117, %116 : i1 +// CHECK-NEXT: %119 = arith.addi %115, %83 : i32 +// CHECK-NEXT: %120 = arith.muli %33, %111 : i32 +// CHECK-NEXT: %121 = arith.addi %119, %120 : i32 +// CHECK-NEXT: %122 = arith.constant 4 : i32 +// CHECK-NEXT: %123 = arith.constant 10 : i32 +// CHECK-NEXT: %124 = "mpi.vector_get"(%3, %122) : (!mpi.vector, i32) -> !mpi.request +// CHECK-NEXT: %125 = "mpi.vector_get"(%3, %123) : (!mpi.vector, i32) -> !mpi.request +// CHECK-NEXT: "scf.if"(%118) ({ +// CHECK-NEXT: %126 = builtin.unrealized_conversion_cast %u_t0 : memref<55x105x105xf32> to memref<55x105x105xf32> +// CHECK-NEXT: %127 = memref.subview %126[2, 2, 102] [51, 101, 1] [1, 1, 1] : memref<55x105x105xf32> to memref<51x101xf32, strided<[11025, 105], offset: 22362>> +// CHECK-NEXT: "memref.copy"(%127, %send_buff_ex4) : (memref<51x101xf32, strided<[11025, 105], offset: 22362>>, memref<51x101xf32>) -> () +// CHECK-NEXT: "mpi.isend"(%send_buff_ex4_ptr, %21, %22, %121, %32, %124) : (!llvm.ptr, i32, !mpi.datatype, i32, i32, !mpi.request) -> () +// CHECK-NEXT: "mpi.irecv"(%recv_buff_ex4_ptr, %23, %24, %121, %32, %125) : (!llvm.ptr, i32, !mpi.datatype, i32, i32, !mpi.request) -> () +// CHECK-NEXT: scf.yield +// CHECK-NEXT: }, { +// CHECK-NEXT: "mpi.request_null"(%124) : (!mpi.request) -> () +// CHECK-NEXT: "mpi.request_null"(%125) : (!mpi.request) -> () +// CHECK-NEXT: scf.yield +// CHECK-NEXT: }) : (i1) -> () +// CHECK-NEXT: %128 = arith.remui %4, %33 : i32 +// CHECK-NEXT: %129 = arith.divui %128, %33 : i32 +// CHECK-NEXT: %130 = arith.remui %128, %33 : i32 +// CHECK-NEXT: %131 = arith.divui %130, %33 : i32 +// CHECK-NEXT: %132 = arith.remui %130, %33 : i32 +// CHECK-NEXT: %133 = arith.addi %131, %60 : i32 +// CHECK-NEXT: %134 = arith.cmpi sge, %133, %32 : i32 +// CHECK-NEXT: %135 = arith.andi %117, %134 : i1 +// CHECK-NEXT: %136 = arith.addi %133, %83 : i32 +// CHECK-NEXT: %137 = arith.muli %33, %129 : i32 +// CHECK-NEXT: %138 = arith.addi %136, %137 : i32 +// CHECK-NEXT: %139 = arith.constant 5 : i32 +// CHECK-NEXT: %140 = arith.constant 11 : i32 +// CHECK-NEXT: %141 = "mpi.vector_get"(%3, %139) : (!mpi.vector, i32) -> !mpi.request +// CHECK-NEXT: %142 = "mpi.vector_get"(%3, %140) : (!mpi.vector, i32) -> !mpi.request +// CHECK-NEXT: "scf.if"(%135) ({ +// CHECK-NEXT: %143 = builtin.unrealized_conversion_cast %u_t0 : memref<55x105x105xf32> to memref<55x105x105xf32> +// CHECK-NEXT: %144 = memref.subview %143[2, 2, 2] [51, 101, 1] [1, 1, 1] : memref<55x105x105xf32> to memref<51x101xf32, strided<[11025, 105], offset: 22262>> +// CHECK-NEXT: "memref.copy"(%144, %send_buff_ex5) : (memref<51x101xf32, strided<[11025, 105], offset: 22262>>, memref<51x101xf32>) -> () +// CHECK-NEXT: "mpi.isend"(%send_buff_ex5_ptr, %25, %26, %138, %32, %141) : (!llvm.ptr, i32, !mpi.datatype, i32, i32, !mpi.request) -> () +// CHECK-NEXT: "mpi.irecv"(%recv_buff_ex5_ptr, %27, %28, %138, %32, %142) : (!llvm.ptr, i32, !mpi.datatype, i32, i32, !mpi.request) -> () +// CHECK-NEXT: scf.yield +// CHECK-NEXT: }, { +// CHECK-NEXT: "mpi.request_null"(%141) : (!mpi.request) -> () +// CHECK-NEXT: "mpi.request_null"(%142) : (!mpi.request) -> () +// CHECK-NEXT: scf.yield +// CHECK-NEXT: }) : (i1) -> () +// CHECK-NEXT: "mpi.waitall"(%3, %2) : (!mpi.vector, i32) -> () +// CHECK-NEXT: "scf.if"(%45) ({ +// CHECK-NEXT: %145 = builtin.unrealized_conversion_cast %u_t0 : memref<55x105x105xf32> to memref<55x105x105xf32> +// CHECK-NEXT: %146 = memref.subview %145[53, 2, 2] [1, 101, 101] [1, 1, 1] : memref<55x105x105xf32> to memref<101x101xf32, strided<[105, 1], offset: 584537>> +// CHECK-NEXT: "memref.copy"(%recv_buff_ex0, %146) : (memref<101x101xf32>, memref<101x101xf32, strided<[105, 1], offset: 584537>>) -> () +// CHECK-NEXT: scf.yield +// CHECK-NEXT: }, { +// CHECK-NEXT: scf.yield +// CHECK-NEXT: }) : (i1) -> () +// CHECK-NEXT: "scf.if"(%64) ({ +// CHECK-NEXT: %147 = builtin.unrealized_conversion_cast %u_t0 : memref<55x105x105xf32> to memref<55x105x105xf32> +// CHECK-NEXT: %148 = memref.subview %147[1, 2, 2] [1, 101, 101] [1, 1, 1] : memref<55x105x105xf32> to memref<101x101xf32, strided<[105, 1], offset: 11237>> +// CHECK-NEXT: "memref.copy"(%recv_buff_ex1, %148) : (memref<101x101xf32>, memref<101x101xf32, strided<[105, 1], offset: 11237>>) -> () +// CHECK-NEXT: scf.yield +// CHECK-NEXT: }, { +// CHECK-NEXT: scf.yield +// CHECK-NEXT: }) : (i1) -> () +// CHECK-NEXT: "scf.if"(%82) ({ +// CHECK-NEXT: %149 = builtin.unrealized_conversion_cast %u_t0 : memref<55x105x105xf32> to memref<55x105x105xf32> +// CHECK-NEXT: %150 = memref.subview %149[2, 103, 2] [51, 1, 101] [1, 1, 1] : memref<55x105x105xf32> to memref<51x101xf32, strided<[11025, 1], offset: 32867>> +// CHECK-NEXT: "memref.copy"(%recv_buff_ex2, %150) : (memref<51x101xf32>, memref<51x101xf32, strided<[11025, 1], offset: 32867>>) -> () +// CHECK-NEXT: scf.yield +// CHECK-NEXT: }, { +// CHECK-NEXT: scf.yield +// CHECK-NEXT: }) : (i1) -> () +// CHECK-NEXT: "scf.if"(%100) ({ +// CHECK-NEXT: %151 = builtin.unrealized_conversion_cast %u_t0 : memref<55x105x105xf32> to memref<55x105x105xf32> +// CHECK-NEXT: %152 = memref.subview %151[2, 1, 2] [51, 1, 101] [1, 1, 1] : memref<55x105x105xf32> to memref<51x101xf32, strided<[11025, 1], offset: 22157>> +// CHECK-NEXT: "memref.copy"(%recv_buff_ex3, %152) : (memref<51x101xf32>, memref<51x101xf32, strided<[11025, 1], offset: 22157>>) -> () +// CHECK-NEXT: scf.yield +// CHECK-NEXT: }, { +// CHECK-NEXT: scf.yield +// CHECK-NEXT: }) : (i1) -> () +// CHECK-NEXT: "scf.if"(%118) ({ +// CHECK-NEXT: %153 = builtin.unrealized_conversion_cast %u_t0 : memref<55x105x105xf32> to memref<55x105x105xf32> +// CHECK-NEXT: %154 = memref.subview %153[2, 2, 103] [51, 101, 1] [1, 1, 1] : memref<55x105x105xf32> to memref<51x101xf32, strided<[11025, 105], offset: 22363>> +// CHECK-NEXT: "memref.copy"(%recv_buff_ex4, %154) : (memref<51x101xf32>, memref<51x101xf32, strided<[11025, 105], offset: 22363>>) -> () +// CHECK-NEXT: scf.yield +// CHECK-NEXT: }, { +// CHECK-NEXT: scf.yield +// CHECK-NEXT: }) : (i1) -> () +// CHECK-NEXT: "scf.if"(%135) ({ +// CHECK-NEXT: %155 = builtin.unrealized_conversion_cast %u_t0 : memref<55x105x105xf32> to memref<55x105x105xf32> +// CHECK-NEXT: %156 = memref.subview %155[2, 2, 1] [51, 101, 1] [1, 1, 1] : memref<55x105x105xf32> to memref<51x101xf32, strided<[11025, 105], offset: 22261>> +// CHECK-NEXT: "memref.copy"(%recv_buff_ex5, %156) : (memref<51x101xf32>, memref<51x101xf32, strided<[11025, 105], offset: 22261>>) -> () +// CHECK-NEXT: scf.yield +// CHECK-NEXT: }, { +// CHECK-NEXT: scf.yield +// CHECK-NEXT: }) : (i1) -> () +// CHECK-NEXT: %157 = memref.subview %u_t1[2, 2, 2] [55, 105, 105] [1, 1, 1] : memref<55x105x105xf32> to memref<55x105x105xf32, strided<[11025, 105, 1], offset: 22262>> +// CHECK-NEXT: %u_t0_blk = memref.subview %u_t0[2, 2, 2] [55, 105, 105] [1, 1, 1] : memref<55x105x105xf32> to memref<55x105x105xf32, strided<[11025, 105, 1], offset: 22262>> +// CHECK-NEXT: %u_t2_blk = memref.subview %u_t2[2, 2, 2] [55, 105, 105] [1, 1, 1] : memref<55x105x105xf32> to memref<55x105x105xf32, strided<[11025, 105, 1], offset: 22262>> +// CHECK-NEXT: %158 = arith.constant 0 : index +// CHECK-NEXT: %159 = arith.constant 51 : index +// CHECK-NEXT: %160 = arith.constant 101 : index +// CHECK-NEXT: "scf.parallel"(%158, %158, %158, %159, %160, %160, %time_m, %time_m, %time_m) <{"operandSegmentSizes" = array}> ({ +// CHECK-NEXT: ^0(%161 : index, %162 : index, %163 : index): // CHECK-NEXT: %dt = arith.constant 1.000000e-04 : f32 -// CHECK-NEXT: %18 = arith.constant 2 : i64 -// CHECK-NEXT: %19 = "math.fpowi"(%dt, %18) : (f32, i64) -> f32 -// CHECK-NEXT: %20 = arith.constant -1 : i64 -// CHECK-NEXT: %dt_1 = arith.constant 1.000000e-04 : f32 -// CHECK-NEXT: %21 = arith.constant -2 : i64 -// CHECK-NEXT: %22 = "math.fpowi"(%dt_1, %21) : (f32, i64) -> f32 -// CHECK-NEXT: %23 = memref.load %u_t2_loadview[%15, %16, %17] : memref<51x101x101xf32, strided<[11025, 105, 1], offset: 22262>> -// CHECK-NEXT: %24 = arith.mulf %22, %23 : f32 -// CHECK-NEXT: %25 = arith.constant -2.000000e+00 : f32 -// CHECK-NEXT: %dt_2 = arith.constant 1.000000e-04 : f32 -// CHECK-NEXT: %26 = arith.constant -2 : i64 -// CHECK-NEXT: %27 = "math.fpowi"(%dt_2, %26) : (f32, i64) -> f32 -// CHECK-NEXT: %28 = memref.load %u_t0_loadview[%15, %16, %17] : memref<53x103x103xf32, strided<[11025, 105, 1], offset: 22262>> -// CHECK-NEXT: %29 = arith.mulf %25, %27 : f32 -// CHECK-NEXT: %30 = arith.mulf %29, %28 : f32 -// CHECK-NEXT: %31 = arith.addf %24, %30 : f32 -// CHECK-NEXT: %32 = arith.sitofp %20 : i64 to f32 -// CHECK-NEXT: %33 = arith.mulf %32, %31 : f32 +// CHECK-NEXT: %164 = arith.constant 2 : i64 +// CHECK-NEXT: %165 = "math.fpowi"(%dt, %164) : (f32, i64) -> f32 +// CHECK-NEXT: %166 = arith.constant -1 : i64 +// CHECK-NEXT: %167 = arith.constant -2 : i64 +// CHECK-NEXT: %168 = "math.fpowi"(%dt, %167) : (f32, i64) -> f32 +// CHECK-NEXT: %169 = memref.load %u_t2_blk[%161, %162, %163] : memref<55x105x105xf32, strided<[11025, 105, 1], offset: 22262>> +// CHECK-NEXT: %170 = arith.mulf %168, %169 : f32 +// CHECK-NEXT: %171 = arith.constant -2.000000e+00 : f32 +// CHECK-NEXT: %172 = memref.load %u_t0_blk[%161, %162, %163] : memref<55x105x105xf32, strided<[11025, 105, 1], offset: 22262>> +// CHECK-NEXT: %173 = arith.mulf %171, %168 : f32 +// CHECK-NEXT: %174 = arith.mulf %173, %172 : f32 +// CHECK-NEXT: %175 = arith.addf %170, %174 : f32 +// CHECK-NEXT: %176 = arith.sitofp %166 : i64 to f32 +// CHECK-NEXT: %177 = arith.mulf %176, %175 : f32 // CHECK-NEXT: %h_x = arith.constant 1.000000e-02 : f32 -// CHECK-NEXT: %34 = arith.constant -2 : i64 -// CHECK-NEXT: %35 = "math.fpowi"(%h_x, %34) : (f32, i64) -> f32 -// CHECK-NEXT: %36 = arith.constant -1 : index -// CHECK-NEXT: %37 = arith.addi %15, %36 : index -// CHECK-NEXT: %38 = memref.load %u_t0_loadview[%37, %16, %17] : memref<53x103x103xf32, strided<[11025, 105, 1], offset: 22262>> -// CHECK-NEXT: %39 = arith.mulf %35, %38 : f32 -// CHECK-NEXT: %h_x_1 = arith.constant 1.000000e-02 : f32 -// CHECK-NEXT: %40 = arith.constant -2 : i64 -// CHECK-NEXT: %41 = "math.fpowi"(%h_x_1, %40) : (f32, i64) -> f32 -// CHECK-NEXT: %42 = arith.constant 1 : index -// CHECK-NEXT: %43 = arith.addi %15, %42 : index -// CHECK-NEXT: %44 = memref.load %u_t0_loadview[%43, %16, %17] : memref<53x103x103xf32, strided<[11025, 105, 1], offset: 22262>> -// CHECK-NEXT: %45 = arith.mulf %41, %44 : f32 -// CHECK-NEXT: %46 = arith.constant -2.000000e+00 : f32 -// CHECK-NEXT: %h_x_2 = arith.constant 1.000000e-02 : f32 -// CHECK-NEXT: %47 = arith.constant -2 : i64 -// CHECK-NEXT: %48 = "math.fpowi"(%h_x_2, %47) : (f32, i64) -> f32 -// CHECK-NEXT: %49 = memref.load %u_t0_loadview[%15, %16, %17] : memref<53x103x103xf32, strided<[11025, 105, 1], offset: 22262>> -// CHECK-NEXT: %50 = arith.mulf %46, %48 : f32 -// CHECK-NEXT: %51 = arith.mulf %50, %49 : f32 -// CHECK-NEXT: %52 = arith.addf %39, %45 : f32 -// CHECK-NEXT: %53 = arith.addf %52, %51 : f32 -// CHECK-NEXT: %h_y = arith.constant 1.000000e-02 : f32 -// CHECK-NEXT: %54 = arith.constant -2 : i64 -// CHECK-NEXT: %55 = "math.fpowi"(%h_y, %54) : (f32, i64) -> f32 -// CHECK-NEXT: %56 = arith.constant -1 : index -// CHECK-NEXT: %57 = arith.addi %16, %56 : index -// CHECK-NEXT: %58 = memref.load %u_t0_loadview[%15, %57, %17] : memref<53x103x103xf32, strided<[11025, 105, 1], offset: 22262>> -// CHECK-NEXT: %59 = arith.mulf %55, %58 : f32 -// CHECK-NEXT: %h_y_1 = arith.constant 1.000000e-02 : f32 -// CHECK-NEXT: %60 = arith.constant -2 : i64 -// CHECK-NEXT: %61 = "math.fpowi"(%h_y_1, %60) : (f32, i64) -> f32 -// CHECK-NEXT: %62 = arith.constant 1 : index -// CHECK-NEXT: %63 = arith.addi %16, %62 : index -// CHECK-NEXT: %64 = memref.load %u_t0_loadview[%15, %63, %17] : memref<53x103x103xf32, strided<[11025, 105, 1], offset: 22262>> -// CHECK-NEXT: %65 = arith.mulf %61, %64 : f32 -// CHECK-NEXT: %66 = arith.constant -2.000000e+00 : f32 -// CHECK-NEXT: %h_y_2 = arith.constant 1.000000e-02 : f32 -// CHECK-NEXT: %67 = arith.constant -2 : i64 -// CHECK-NEXT: %68 = "math.fpowi"(%h_y_2, %67) : (f32, i64) -> f32 -// CHECK-NEXT: %69 = memref.load %u_t0_loadview[%15, %16, %17] : memref<53x103x103xf32, strided<[11025, 105, 1], offset: 22262>> -// CHECK-NEXT: %70 = arith.mulf %66, %68 : f32 -// CHECK-NEXT: %71 = arith.mulf %70, %69 : f32 -// CHECK-NEXT: %72 = arith.addf %59, %65 : f32 -// CHECK-NEXT: %73 = arith.addf %72, %71 : f32 -// CHECK-NEXT: %h_z = arith.constant 1.000000e-02 : f32 -// CHECK-NEXT: %74 = arith.constant -2 : i64 -// CHECK-NEXT: %75 = "math.fpowi"(%h_z, %74) : (f32, i64) -> f32 -// CHECK-NEXT: %76 = arith.constant -1 : index -// CHECK-NEXT: %77 = arith.addi %17, %76 : index -// CHECK-NEXT: %78 = memref.load %u_t0_loadview[%15, %16, %77] : memref<53x103x103xf32, strided<[11025, 105, 1], offset: 22262>> -// CHECK-NEXT: %79 = arith.mulf %75, %78 : f32 -// CHECK-NEXT: %h_z_1 = arith.constant 1.000000e-02 : f32 -// CHECK-NEXT: %80 = arith.constant -2 : i64 -// CHECK-NEXT: %81 = "math.fpowi"(%h_z_1, %80) : (f32, i64) -> f32 -// CHECK-NEXT: %82 = arith.constant 1 : index -// CHECK-NEXT: %83 = arith.addi %17, %82 : index -// CHECK-NEXT: %84 = memref.load %u_t0_loadview[%15, %16, %83] : memref<53x103x103xf32, strided<[11025, 105, 1], offset: 22262>> -// CHECK-NEXT: %85 = arith.mulf %81, %84 : f32 -// CHECK-NEXT: %86 = arith.constant -2.000000e+00 : f32 -// CHECK-NEXT: %h_z_2 = arith.constant 1.000000e-02 : f32 -// CHECK-NEXT: %87 = arith.constant -2 : i64 -// CHECK-NEXT: %88 = "math.fpowi"(%h_z_2, %87) : (f32, i64) -> f32 -// CHECK-NEXT: %89 = memref.load %u_t0_loadview[%15, %16, %17] : memref<53x103x103xf32, strided<[11025, 105, 1], offset: 22262>> -// CHECK-NEXT: %90 = arith.mulf %86, %88 : f32 -// CHECK-NEXT: %91 = arith.mulf %90, %89 : f32 -// CHECK-NEXT: %92 = arith.addf %79, %85 : f32 -// CHECK-NEXT: %93 = arith.addf %92, %91 : f32 -// CHECK-NEXT: %94 = arith.addf %33, %53 : f32 -// CHECK-NEXT: %95 = arith.addf %94, %73 : f32 -// CHECK-NEXT: %96 = arith.addf %95, %93 : f32 -// CHECK-NEXT: %97 = arith.mulf %19, %96 : f32 -// CHECK-NEXT: memref.store %97, %u_t1_storeview[%15, %16, %17] : memref<51x101x101xf32, strided<[11025, 105, 1], offset: 22262>> +// CHECK-NEXT: %178 = "math.fpowi"(%h_x, %167) : (f32, i64) -> f32 +// CHECK-NEXT: %179 = arith.constant -1 : index +// CHECK-NEXT: %180 = arith.addi %161, %179 : index +// CHECK-NEXT: %181 = memref.load %u_t0_blk[%180, %162, %163] : memref<55x105x105xf32, strided<[11025, 105, 1], offset: 22262>> +// CHECK-NEXT: %182 = arith.mulf %178, %181 : f32 +// CHECK-NEXT: %183 = arith.addi %161, %time_m : index +// CHECK-NEXT: %184 = memref.load %u_t0_blk[%183, %162, %163] : memref<55x105x105xf32, strided<[11025, 105, 1], offset: 22262>> +// CHECK-NEXT: %185 = arith.mulf %178, %184 : f32 +// CHECK-NEXT: %186 = arith.mulf %171, %178 : f32 +// CHECK-NEXT: %187 = arith.mulf %186, %172 : f32 +// CHECK-NEXT: %188 = arith.addf %182, %185 : f32 +// CHECK-NEXT: %189 = arith.addf %188, %187 : f32 +// CHECK-NEXT: %190 = arith.addi %162, %179 : index +// CHECK-NEXT: %191 = memref.load %u_t0_blk[%161, %190, %163] : memref<55x105x105xf32, strided<[11025, 105, 1], offset: 22262>> +// CHECK-NEXT: %192 = arith.mulf %178, %191 : f32 +// CHECK-NEXT: %193 = arith.addi %162, %time_m : index +// CHECK-NEXT: %194 = memref.load %u_t0_blk[%161, %193, %163] : memref<55x105x105xf32, strided<[11025, 105, 1], offset: 22262>> +// CHECK-NEXT: %195 = arith.mulf %178, %194 : f32 +// CHECK-NEXT: %196 = arith.addf %192, %195 : f32 +// CHECK-NEXT: %197 = arith.addf %196, %187 : f32 +// CHECK-NEXT: %198 = arith.addi %163, %179 : index +// CHECK-NEXT: %199 = memref.load %u_t0_blk[%161, %162, %198] : memref<55x105x105xf32, strided<[11025, 105, 1], offset: 22262>> +// CHECK-NEXT: %200 = arith.mulf %178, %199 : f32 +// CHECK-NEXT: %201 = arith.addi %163, %time_m : index +// CHECK-NEXT: %202 = memref.load %u_t0_blk[%161, %162, %201] : memref<55x105x105xf32, strided<[11025, 105, 1], offset: 22262>> +// CHECK-NEXT: %203 = arith.mulf %178, %202 : f32 +// CHECK-NEXT: %204 = arith.addf %200, %203 : f32 +// CHECK-NEXT: %205 = arith.addf %204, %187 : f32 +// CHECK-NEXT: %206 = arith.addf %177, %189 : f32 +// CHECK-NEXT: %207 = arith.addf %206, %197 : f32 +// CHECK-NEXT: %208 = arith.addf %207, %205 : f32 +// CHECK-NEXT: %209 = arith.mulf %165, %208 : f32 +// CHECK-NEXT: memref.store %209, %157[%161, %162, %163] : memref<55x105x105xf32, strided<[11025, 105, 1], offset: 22262>> // CHECK-NEXT: scf.yield // CHECK-NEXT: }) : (index, index, index, index, index, index, index, index, index) -> () -// CHECK-NEXT: %u_t1_temp = "memref.subview"(%u_t1) <{"static_offsets" = array, "static_sizes" = array, "static_strides" = array, "operandSegmentSizes" = array}> : (memref<55x105x105xf32>) -> memref<51x101x101xf32, strided<[11025, 105, 1], offset: 22262>> // CHECK-NEXT: scf.yield %u_t1, %u_t2, %u_t0 : memref<55x105x105xf32>, memref<55x105x105xf32>, memref<55x105x105xf32> // CHECK-NEXT: } -// CHECK-NEXT: %98 = func.call @timer_end(%0) : (f64) -> f64 -// CHECK-NEXT: "llvm.store"(%98, %timers) <{"ordering" = 0 : i64}> : (f64, !llvm.ptr) -> () +// CHECK-NEXT: %210 = func.call @timer_end(%0) : (f64) -> f64 +// CHECK-NEXT: "llvm.store"(%210, %timers) <{"ordering" = 0 : i64}> : (f64, !llvm.ptr) -> () // CHECK-NEXT: func.return // CHECK-NEXT: } // CHECK-NEXT: func.func private @timer_start() -> f64 // CHECK-NEXT: func.func private @timer_end(f64) -> f64 -// CHECK-NEXT: } \ No newline at end of file +// CHECK-NEXT: } diff --git a/tests/filecheck/xdsl_mpi_pipeline_d.mlir b/tests/filecheck/xdsl_mpi_pipeline_d.mlir index 24460a858d..d086da064e 100644 --- a/tests/filecheck/xdsl_mpi_pipeline_d.mlir +++ b/tests/filecheck/xdsl_mpi_pipeline_d.mlir @@ -1,4 +1,4 @@ -// RUN: xdsl-opt -p "canonicalize,cse,distribute-stencil{strategy=3d-grid slices=2,1,1 restrict_domain=false},shape-inference,canonicalize-dmp,stencil-bufferize,dmp-to-mpi{mpi_init=false},convert-stencil-to-ll-mlir,scf-parallel-loop-tiling{parallel-loop-tile-sizes=64,64,0}" %s | filecheck %s +// RUN: xdsl-opt -p "canonicalize,cse,distribute-stencil{strategy=3d-grid slices=2,1,1 restrict_domain=false},shape-inference,canonicalize-dmp,stencil-bufferize,dmp-to-mpi{mpi_init=false},convert-stencil-to-ll-mlir,scf-parallel-loop-tiling{parallel-loop-tile-sizes=64,64,0},canonicalize,cse" %s | filecheck %s builtin.module { func.func @Kernel(%u_vec0 : !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, %u_vec1 : !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, %u_vec2 : !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, %timers : !llvm.ptr) { @@ -111,140 +111,332 @@ builtin.module { // CHECK-NEXT: %0 = func.call @timer_start() : () -> f64 // CHECK-NEXT: %time_m = arith.constant 1 : index // CHECK-NEXT: %time_M = arith.constant 20 : index -// CHECK-NEXT: %1 = arith.constant 1 : index -// CHECK-NEXT: %2 = arith.addi %time_M, %1 : index -// CHECK-NEXT: %step = arith.constant 1 : index -// CHECK-NEXT: %3, %4, %5 = scf.for %time = %time_m to %2 step %step iter_args(%u_t0 = %u_vec0, %u_t1 = %u_vec1, %u_t2 = %u_vec2) -> (memref<55x105x105xf32>, memref<55x105x105xf32>, memref<55x105x105xf32>) { -// CHECK-NEXT: %u_t1_storeview = "memref.subview"(%u_t1) <{"static_offsets" = array, "static_sizes" = array, "static_strides" = array, "operandSegmentSizes" = array}> : (memref<55x105x105xf32>) -> memref<51x101x101xf32, strided<[11025, 105, 1], offset: 22262>> -// CHECK-NEXT: %u_t0_loadview = "memref.subview"(%u_t0) <{"static_offsets" = array, "static_sizes" = array, "static_strides" = array, "operandSegmentSizes" = array}> : (memref<55x105x105xf32>) -> memref<53x103x103xf32, strided<[11025, 105, 1], offset: 22262>> -// CHECK-NEXT: "dmp.swap"(%u_t0_loadview) {"topo" = #dmp.topo<2x1x1>, "swaps" = [#dmp.exchange, #dmp.exchange, #dmp.exchange, #dmp.exchange, #dmp.exchange, #dmp.exchange]} : (memref<53x103x103xf32, strided<[11025, 105, 1], offset: 22262>>) -> () -// CHECK-NEXT: %u_t2_loadview = "memref.subview"(%u_t2) <{"static_offsets" = array, "static_sizes" = array, "static_strides" = array, "operandSegmentSizes" = array}> : (memref<55x105x105xf32>) -> memref<51x101x101xf32, strided<[11025, 105, 1], offset: 22262>> -// CHECK-NEXT: %6 = arith.constant 0 : index -// CHECK-NEXT: %7 = arith.constant 0 : index -// CHECK-NEXT: %8 = arith.constant 0 : index -// CHECK-NEXT: %9 = arith.constant 1 : index -// CHECK-NEXT: %10 = arith.constant 1 : index -// CHECK-NEXT: %11 = arith.constant 1 : index -// CHECK-NEXT: %12 = arith.constant 51 : index -// CHECK-NEXT: %13 = arith.constant 101 : index -// CHECK-NEXT: %14 = arith.constant 101 : index -// CHECK-NEXT: %15 = arith.constant 0 : index -// CHECK-NEXT: %16 = arith.constant 64 : index -// CHECK-NEXT: %17 = arith.constant 64 : index -// CHECK-NEXT: %18 = arith.muli %9, %16 : index -// CHECK-NEXT: %19 = arith.muli %10, %17 : index -// CHECK-NEXT: "scf.parallel"(%6, %7, %12, %13, %18, %19) <{"operandSegmentSizes" = array}> ({ -// CHECK-NEXT: ^0(%20 : index, %21 : index): -// CHECK-NEXT: %22 = "affine.min"(%16, %12, %20) <{"map" = affine_map<(d0, d1, d2) -> (d0, (d1 + (d2 * -1)))>}> : (index, index, index) -> index -// CHECK-NEXT: %23 = "affine.min"(%17, %13, %21) <{"map" = affine_map<(d0, d1, d2) -> (d0, (d1 + (d2 * -1)))>}> : (index, index, index) -> index -// CHECK-NEXT: "scf.parallel"(%15, %15, %8, %22, %23, %14, %9, %10, %11) <{"operandSegmentSizes" = array}> ({ -// CHECK-NEXT: ^1(%24 : index, %25 : index, %26 : index): -// CHECK-NEXT: %27 = arith.addi %20, %24 : index -// CHECK-NEXT: %28 = arith.addi %21, %25 : index +// CHECK-NEXT: %1 = arith.addi %time_M, %time_m : index +// CHECK-NEXT: %2 = arith.constant 12 : i32 +// CHECK-NEXT: %3 = "mpi.allocate"(%2) {"dtype" = !mpi.request} : (i32) -> !mpi.vector +// CHECK-NEXT: %4 = "mpi.comm.rank"() : () -> i32 +// CHECK-NEXT: %send_buff_ex0 = memref.alloc() {"alignment" = 64 : i64} : memref<101x101xf32> +// CHECK-NEXT: %send_buff_ex0_ptr, %5, %6 = "mpi.unwrap_memref"(%send_buff_ex0) : (memref<101x101xf32>) -> (!llvm.ptr, i32, !mpi.datatype) +// CHECK-NEXT: %recv_buff_ex0 = memref.alloc() {"alignment" = 64 : i64} : memref<101x101xf32> +// CHECK-NEXT: %recv_buff_ex0_ptr, %7, %8 = "mpi.unwrap_memref"(%recv_buff_ex0) : (memref<101x101xf32>) -> (!llvm.ptr, i32, !mpi.datatype) +// CHECK-NEXT: %send_buff_ex1 = memref.alloc() {"alignment" = 64 : i64} : memref<101x101xf32> +// CHECK-NEXT: %send_buff_ex1_ptr, %9, %10 = "mpi.unwrap_memref"(%send_buff_ex1) : (memref<101x101xf32>) -> (!llvm.ptr, i32, !mpi.datatype) +// CHECK-NEXT: %recv_buff_ex1 = memref.alloc() {"alignment" = 64 : i64} : memref<101x101xf32> +// CHECK-NEXT: %recv_buff_ex1_ptr, %11, %12 = "mpi.unwrap_memref"(%recv_buff_ex1) : (memref<101x101xf32>) -> (!llvm.ptr, i32, !mpi.datatype) +// CHECK-NEXT: %send_buff_ex2 = memref.alloc() {"alignment" = 64 : i64} : memref<51x101xf32> +// CHECK-NEXT: %send_buff_ex2_ptr, %13, %14 = "mpi.unwrap_memref"(%send_buff_ex2) : (memref<51x101xf32>) -> (!llvm.ptr, i32, !mpi.datatype) +// CHECK-NEXT: %recv_buff_ex2 = memref.alloc() {"alignment" = 64 : i64} : memref<51x101xf32> +// CHECK-NEXT: %recv_buff_ex2_ptr, %15, %16 = "mpi.unwrap_memref"(%recv_buff_ex2) : (memref<51x101xf32>) -> (!llvm.ptr, i32, !mpi.datatype) +// CHECK-NEXT: %send_buff_ex3 = memref.alloc() {"alignment" = 64 : i64} : memref<51x101xf32> +// CHECK-NEXT: %send_buff_ex3_ptr, %17, %18 = "mpi.unwrap_memref"(%send_buff_ex3) : (memref<51x101xf32>) -> (!llvm.ptr, i32, !mpi.datatype) +// CHECK-NEXT: %recv_buff_ex3 = memref.alloc() {"alignment" = 64 : i64} : memref<51x101xf32> +// CHECK-NEXT: %recv_buff_ex3_ptr, %19, %20 = "mpi.unwrap_memref"(%recv_buff_ex3) : (memref<51x101xf32>) -> (!llvm.ptr, i32, !mpi.datatype) +// CHECK-NEXT: %send_buff_ex4 = memref.alloc() {"alignment" = 64 : i64} : memref<51x101xf32> +// CHECK-NEXT: %send_buff_ex4_ptr, %21, %22 = "mpi.unwrap_memref"(%send_buff_ex4) : (memref<51x101xf32>) -> (!llvm.ptr, i32, !mpi.datatype) +// CHECK-NEXT: %recv_buff_ex4 = memref.alloc() {"alignment" = 64 : i64} : memref<51x101xf32> +// CHECK-NEXT: %recv_buff_ex4_ptr, %23, %24 = "mpi.unwrap_memref"(%recv_buff_ex4) : (memref<51x101xf32>) -> (!llvm.ptr, i32, !mpi.datatype) +// CHECK-NEXT: %send_buff_ex5 = memref.alloc() {"alignment" = 64 : i64} : memref<51x101xf32> +// CHECK-NEXT: %send_buff_ex5_ptr, %25, %26 = "mpi.unwrap_memref"(%send_buff_ex5) : (memref<51x101xf32>) -> (!llvm.ptr, i32, !mpi.datatype) +// CHECK-NEXT: %recv_buff_ex5 = memref.alloc() {"alignment" = 64 : i64} : memref<51x101xf32> +// CHECK-NEXT: %recv_buff_ex5_ptr, %27, %28 = "mpi.unwrap_memref"(%recv_buff_ex5) : (memref<51x101xf32>) -> (!llvm.ptr, i32, !mpi.datatype) +// CHECK-NEXT: %29, %30, %31 = scf.for %time = %time_m to %1 step %time_m iter_args(%u_t0 = %u_vec0, %u_t1 = %u_vec1, %u_t2 = %u_vec2) -> (memref<55x105x105xf32>, memref<55x105x105xf32>, memref<55x105x105xf32>) { +// CHECK-NEXT: %32 = arith.constant 0 : i32 +// CHECK-NEXT: %33 = arith.constant 1 : i32 +// CHECK-NEXT: %34 = arith.divui %4, %33 : i32 +// CHECK-NEXT: %35 = arith.remui %4, %33 : i32 +// CHECK-NEXT: %36 = arith.divui %35, %33 : i32 +// CHECK-NEXT: %37 = arith.remui %35, %33 : i32 +// CHECK-NEXT: %38 = arith.divui %37, %33 : i32 +// CHECK-NEXT: %39 = arith.remui %37, %33 : i32 +// CHECK-NEXT: %40 = arith.addi %34, %33 : i32 +// CHECK-NEXT: %41 = arith.constant 2 : i32 +// CHECK-NEXT: %42 = arith.cmpi slt, %40, %41 : i32 +// CHECK-NEXT: %43 = arith.constant true +// CHECK-NEXT: %44 = arith.andi %42, %43 : i1 +// CHECK-NEXT: %45 = arith.andi %44, %43 : i1 +// CHECK-NEXT: %46 = arith.muli %33, %40 : i32 +// CHECK-NEXT: %47 = arith.addi %38, %46 : i32 +// CHECK-NEXT: %48 = arith.muli %33, %36 : i32 +// CHECK-NEXT: %49 = arith.addi %47, %48 : i32 +// CHECK-NEXT: %50 = arith.constant 6 : i32 +// CHECK-NEXT: %51 = "mpi.vector_get"(%3, %32) : (!mpi.vector, i32) -> !mpi.request +// CHECK-NEXT: %52 = "mpi.vector_get"(%3, %50) : (!mpi.vector, i32) -> !mpi.request +// CHECK-NEXT: "scf.if"(%45) ({ +// CHECK-NEXT: %53 = builtin.unrealized_conversion_cast %u_t0 : memref<55x105x105xf32> to memref<55x105x105xf32> +// CHECK-NEXT: %54 = memref.subview %53[52, 2, 2] [1, 101, 101] [1, 1, 1] : memref<55x105x105xf32> to memref<101x101xf32, strided<[105, 1], offset: 573512>> +// CHECK-NEXT: "memref.copy"(%54, %send_buff_ex0) : (memref<101x101xf32, strided<[105, 1], offset: 573512>>, memref<101x101xf32>) -> () +// CHECK-NEXT: "mpi.isend"(%send_buff_ex0_ptr, %5, %6, %49, %32, %51) : (!llvm.ptr, i32, !mpi.datatype, i32, i32, !mpi.request) -> () +// CHECK-NEXT: "mpi.irecv"(%recv_buff_ex0_ptr, %7, %8, %49, %32, %52) : (!llvm.ptr, i32, !mpi.datatype, i32, i32, !mpi.request) -> () +// CHECK-NEXT: scf.yield +// CHECK-NEXT: }, { +// CHECK-NEXT: "mpi.request_null"(%51) : (!mpi.request) -> () +// CHECK-NEXT: "mpi.request_null"(%52) : (!mpi.request) -> () +// CHECK-NEXT: scf.yield +// CHECK-NEXT: }) : (i1) -> () +// CHECK-NEXT: %55 = arith.remui %4, %33 : i32 +// CHECK-NEXT: %56 = arith.divui %55, %33 : i32 +// CHECK-NEXT: %57 = arith.remui %55, %33 : i32 +// CHECK-NEXT: %58 = arith.divui %57, %33 : i32 +// CHECK-NEXT: %59 = arith.remui %57, %33 : i32 +// CHECK-NEXT: %60 = arith.constant -1 : i32 +// CHECK-NEXT: %61 = arith.addi %34, %60 : i32 +// CHECK-NEXT: %62 = arith.cmpi sge, %61, %32 : i32 +// CHECK-NEXT: %63 = arith.andi %62, %43 : i1 +// CHECK-NEXT: %64 = arith.andi %63, %43 : i1 +// CHECK-NEXT: %65 = arith.muli %33, %61 : i32 +// CHECK-NEXT: %66 = arith.addi %58, %65 : i32 +// CHECK-NEXT: %67 = arith.muli %33, %56 : i32 +// CHECK-NEXT: %68 = arith.addi %66, %67 : i32 +// CHECK-NEXT: %69 = arith.constant 7 : i32 +// CHECK-NEXT: %70 = "mpi.vector_get"(%3, %33) : (!mpi.vector, i32) -> !mpi.request +// CHECK-NEXT: %71 = "mpi.vector_get"(%3, %69) : (!mpi.vector, i32) -> !mpi.request +// CHECK-NEXT: "scf.if"(%64) ({ +// CHECK-NEXT: %72 = builtin.unrealized_conversion_cast %u_t0 : memref<55x105x105xf32> to memref<55x105x105xf32> +// CHECK-NEXT: %73 = memref.subview %72[2, 2, 2] [1, 101, 101] [1, 1, 1] : memref<55x105x105xf32> to memref<101x101xf32, strided<[105, 1], offset: 22262>> +// CHECK-NEXT: "memref.copy"(%73, %send_buff_ex1) : (memref<101x101xf32, strided<[105, 1], offset: 22262>>, memref<101x101xf32>) -> () +// CHECK-NEXT: "mpi.isend"(%send_buff_ex1_ptr, %9, %10, %68, %32, %70) : (!llvm.ptr, i32, !mpi.datatype, i32, i32, !mpi.request) -> () +// CHECK-NEXT: "mpi.irecv"(%recv_buff_ex1_ptr, %11, %12, %68, %32, %71) : (!llvm.ptr, i32, !mpi.datatype, i32, i32, !mpi.request) -> () +// CHECK-NEXT: scf.yield +// CHECK-NEXT: }, { +// CHECK-NEXT: "mpi.request_null"(%70) : (!mpi.request) -> () +// CHECK-NEXT: "mpi.request_null"(%71) : (!mpi.request) -> () +// CHECK-NEXT: scf.yield +// CHECK-NEXT: }) : (i1) -> () +// CHECK-NEXT: %74 = arith.remui %4, %33 : i32 +// CHECK-NEXT: %75 = arith.divui %74, %33 : i32 +// CHECK-NEXT: %76 = arith.remui %74, %33 : i32 +// CHECK-NEXT: %77 = arith.divui %76, %33 : i32 +// CHECK-NEXT: %78 = arith.remui %76, %33 : i32 +// CHECK-NEXT: %79 = arith.addi %75, %33 : i32 +// CHECK-NEXT: %80 = arith.cmpi slt, %79, %33 : i32 +// CHECK-NEXT: %81 = arith.andi %43, %80 : i1 +// CHECK-NEXT: %82 = arith.andi %81, %43 : i1 +// CHECK-NEXT: %83 = arith.muli %33, %34 : i32 +// CHECK-NEXT: %84 = arith.addi %77, %83 : i32 +// CHECK-NEXT: %85 = arith.muli %33, %79 : i32 +// CHECK-NEXT: %86 = arith.addi %84, %85 : i32 +// CHECK-NEXT: %87 = arith.constant 8 : i32 +// CHECK-NEXT: %88 = "mpi.vector_get"(%3, %41) : (!mpi.vector, i32) -> !mpi.request +// CHECK-NEXT: %89 = "mpi.vector_get"(%3, %87) : (!mpi.vector, i32) -> !mpi.request +// CHECK-NEXT: "scf.if"(%82) ({ +// CHECK-NEXT: %90 = builtin.unrealized_conversion_cast %u_t0 : memref<55x105x105xf32> to memref<55x105x105xf32> +// CHECK-NEXT: %91 = memref.subview %90[2, 102, 2] [51, 1, 101] [1, 1, 1] : memref<55x105x105xf32> to memref<51x101xf32, strided<[11025, 1], offset: 32762>> +// CHECK-NEXT: "memref.copy"(%91, %send_buff_ex2) : (memref<51x101xf32, strided<[11025, 1], offset: 32762>>, memref<51x101xf32>) -> () +// CHECK-NEXT: "mpi.isend"(%send_buff_ex2_ptr, %13, %14, %86, %32, %88) : (!llvm.ptr, i32, !mpi.datatype, i32, i32, !mpi.request) -> () +// CHECK-NEXT: "mpi.irecv"(%recv_buff_ex2_ptr, %15, %16, %86, %32, %89) : (!llvm.ptr, i32, !mpi.datatype, i32, i32, !mpi.request) -> () +// CHECK-NEXT: scf.yield +// CHECK-NEXT: }, { +// CHECK-NEXT: "mpi.request_null"(%88) : (!mpi.request) -> () +// CHECK-NEXT: "mpi.request_null"(%89) : (!mpi.request) -> () +// CHECK-NEXT: scf.yield +// CHECK-NEXT: }) : (i1) -> () +// CHECK-NEXT: %92 = arith.remui %4, %33 : i32 +// CHECK-NEXT: %93 = arith.divui %92, %33 : i32 +// CHECK-NEXT: %94 = arith.remui %92, %33 : i32 +// CHECK-NEXT: %95 = arith.divui %94, %33 : i32 +// CHECK-NEXT: %96 = arith.remui %94, %33 : i32 +// CHECK-NEXT: %97 = arith.addi %93, %60 : i32 +// CHECK-NEXT: %98 = arith.cmpi sge, %97, %32 : i32 +// CHECK-NEXT: %99 = arith.andi %43, %98 : i1 +// CHECK-NEXT: %100 = arith.andi %99, %43 : i1 +// CHECK-NEXT: %101 = arith.addi %95, %83 : i32 +// CHECK-NEXT: %102 = arith.muli %33, %97 : i32 +// CHECK-NEXT: %103 = arith.addi %101, %102 : i32 +// CHECK-NEXT: %104 = arith.constant 3 : i32 +// CHECK-NEXT: %105 = arith.constant 9 : i32 +// CHECK-NEXT: %106 = "mpi.vector_get"(%3, %104) : (!mpi.vector, i32) -> !mpi.request +// CHECK-NEXT: %107 = "mpi.vector_get"(%3, %105) : (!mpi.vector, i32) -> !mpi.request +// CHECK-NEXT: "scf.if"(%100) ({ +// CHECK-NEXT: %108 = builtin.unrealized_conversion_cast %u_t0 : memref<55x105x105xf32> to memref<55x105x105xf32> +// CHECK-NEXT: %109 = memref.subview %108[2, 2, 2] [51, 1, 101] [1, 1, 1] : memref<55x105x105xf32> to memref<51x101xf32, strided<[11025, 1], offset: 22262>> +// CHECK-NEXT: "memref.copy"(%109, %send_buff_ex3) : (memref<51x101xf32, strided<[11025, 1], offset: 22262>>, memref<51x101xf32>) -> () +// CHECK-NEXT: "mpi.isend"(%send_buff_ex3_ptr, %17, %18, %103, %32, %106) : (!llvm.ptr, i32, !mpi.datatype, i32, i32, !mpi.request) -> () +// CHECK-NEXT: "mpi.irecv"(%recv_buff_ex3_ptr, %19, %20, %103, %32, %107) : (!llvm.ptr, i32, !mpi.datatype, i32, i32, !mpi.request) -> () +// CHECK-NEXT: scf.yield +// CHECK-NEXT: }, { +// CHECK-NEXT: "mpi.request_null"(%106) : (!mpi.request) -> () +// CHECK-NEXT: "mpi.request_null"(%107) : (!mpi.request) -> () +// CHECK-NEXT: scf.yield +// CHECK-NEXT: }) : (i1) -> () +// CHECK-NEXT: %110 = arith.remui %4, %33 : i32 +// CHECK-NEXT: %111 = arith.divui %110, %33 : i32 +// CHECK-NEXT: %112 = arith.remui %110, %33 : i32 +// CHECK-NEXT: %113 = arith.divui %112, %33 : i32 +// CHECK-NEXT: %114 = arith.remui %112, %33 : i32 +// CHECK-NEXT: %115 = arith.addi %113, %33 : i32 +// CHECK-NEXT: %116 = arith.cmpi slt, %115, %33 : i32 +// CHECK-NEXT: %117 = arith.andi %43, %43 : i1 +// CHECK-NEXT: %118 = arith.andi %117, %116 : i1 +// CHECK-NEXT: %119 = arith.addi %115, %83 : i32 +// CHECK-NEXT: %120 = arith.muli %33, %111 : i32 +// CHECK-NEXT: %121 = arith.addi %119, %120 : i32 +// CHECK-NEXT: %122 = arith.constant 4 : i32 +// CHECK-NEXT: %123 = arith.constant 10 : i32 +// CHECK-NEXT: %124 = "mpi.vector_get"(%3, %122) : (!mpi.vector, i32) -> !mpi.request +// CHECK-NEXT: %125 = "mpi.vector_get"(%3, %123) : (!mpi.vector, i32) -> !mpi.request +// CHECK-NEXT: "scf.if"(%118) ({ +// CHECK-NEXT: %126 = builtin.unrealized_conversion_cast %u_t0 : memref<55x105x105xf32> to memref<55x105x105xf32> +// CHECK-NEXT: %127 = memref.subview %126[2, 2, 102] [51, 101, 1] [1, 1, 1] : memref<55x105x105xf32> to memref<51x101xf32, strided<[11025, 105], offset: 22362>> +// CHECK-NEXT: "memref.copy"(%127, %send_buff_ex4) : (memref<51x101xf32, strided<[11025, 105], offset: 22362>>, memref<51x101xf32>) -> () +// CHECK-NEXT: "mpi.isend"(%send_buff_ex4_ptr, %21, %22, %121, %32, %124) : (!llvm.ptr, i32, !mpi.datatype, i32, i32, !mpi.request) -> () +// CHECK-NEXT: "mpi.irecv"(%recv_buff_ex4_ptr, %23, %24, %121, %32, %125) : (!llvm.ptr, i32, !mpi.datatype, i32, i32, !mpi.request) -> () +// CHECK-NEXT: scf.yield +// CHECK-NEXT: }, { +// CHECK-NEXT: "mpi.request_null"(%124) : (!mpi.request) -> () +// CHECK-NEXT: "mpi.request_null"(%125) : (!mpi.request) -> () +// CHECK-NEXT: scf.yield +// CHECK-NEXT: }) : (i1) -> () +// CHECK-NEXT: %128 = arith.remui %4, %33 : i32 +// CHECK-NEXT: %129 = arith.divui %128, %33 : i32 +// CHECK-NEXT: %130 = arith.remui %128, %33 : i32 +// CHECK-NEXT: %131 = arith.divui %130, %33 : i32 +// CHECK-NEXT: %132 = arith.remui %130, %33 : i32 +// CHECK-NEXT: %133 = arith.addi %131, %60 : i32 +// CHECK-NEXT: %134 = arith.cmpi sge, %133, %32 : i32 +// CHECK-NEXT: %135 = arith.andi %117, %134 : i1 +// CHECK-NEXT: %136 = arith.addi %133, %83 : i32 +// CHECK-NEXT: %137 = arith.muli %33, %129 : i32 +// CHECK-NEXT: %138 = arith.addi %136, %137 : i32 +// CHECK-NEXT: %139 = arith.constant 5 : i32 +// CHECK-NEXT: %140 = arith.constant 11 : i32 +// CHECK-NEXT: %141 = "mpi.vector_get"(%3, %139) : (!mpi.vector, i32) -> !mpi.request +// CHECK-NEXT: %142 = "mpi.vector_get"(%3, %140) : (!mpi.vector, i32) -> !mpi.request +// CHECK-NEXT: "scf.if"(%135) ({ +// CHECK-NEXT: %143 = builtin.unrealized_conversion_cast %u_t0 : memref<55x105x105xf32> to memref<55x105x105xf32> +// CHECK-NEXT: %144 = memref.subview %143[2, 2, 2] [51, 101, 1] [1, 1, 1] : memref<55x105x105xf32> to memref<51x101xf32, strided<[11025, 105], offset: 22262>> +// CHECK-NEXT: "memref.copy"(%144, %send_buff_ex5) : (memref<51x101xf32, strided<[11025, 105], offset: 22262>>, memref<51x101xf32>) -> () +// CHECK-NEXT: "mpi.isend"(%send_buff_ex5_ptr, %25, %26, %138, %32, %141) : (!llvm.ptr, i32, !mpi.datatype, i32, i32, !mpi.request) -> () +// CHECK-NEXT: "mpi.irecv"(%recv_buff_ex5_ptr, %27, %28, %138, %32, %142) : (!llvm.ptr, i32, !mpi.datatype, i32, i32, !mpi.request) -> () +// CHECK-NEXT: scf.yield +// CHECK-NEXT: }, { +// CHECK-NEXT: "mpi.request_null"(%141) : (!mpi.request) -> () +// CHECK-NEXT: "mpi.request_null"(%142) : (!mpi.request) -> () +// CHECK-NEXT: scf.yield +// CHECK-NEXT: }) : (i1) -> () +// CHECK-NEXT: "mpi.waitall"(%3, %2) : (!mpi.vector, i32) -> () +// CHECK-NEXT: "scf.if"(%45) ({ +// CHECK-NEXT: %145 = builtin.unrealized_conversion_cast %u_t0 : memref<55x105x105xf32> to memref<55x105x105xf32> +// CHECK-NEXT: %146 = memref.subview %145[53, 2, 2] [1, 101, 101] [1, 1, 1] : memref<55x105x105xf32> to memref<101x101xf32, strided<[105, 1], offset: 584537>> +// CHECK-NEXT: "memref.copy"(%recv_buff_ex0, %146) : (memref<101x101xf32>, memref<101x101xf32, strided<[105, 1], offset: 584537>>) -> () +// CHECK-NEXT: scf.yield +// CHECK-NEXT: }, { +// CHECK-NEXT: scf.yield +// CHECK-NEXT: }) : (i1) -> () +// CHECK-NEXT: "scf.if"(%64) ({ +// CHECK-NEXT: %147 = builtin.unrealized_conversion_cast %u_t0 : memref<55x105x105xf32> to memref<55x105x105xf32> +// CHECK-NEXT: %148 = memref.subview %147[1, 2, 2] [1, 101, 101] [1, 1, 1] : memref<55x105x105xf32> to memref<101x101xf32, strided<[105, 1], offset: 11237>> +// CHECK-NEXT: "memref.copy"(%recv_buff_ex1, %148) : (memref<101x101xf32>, memref<101x101xf32, strided<[105, 1], offset: 11237>>) -> () +// CHECK-NEXT: scf.yield +// CHECK-NEXT: }, { +// CHECK-NEXT: scf.yield +// CHECK-NEXT: }) : (i1) -> () +// CHECK-NEXT: "scf.if"(%82) ({ +// CHECK-NEXT: %149 = builtin.unrealized_conversion_cast %u_t0 : memref<55x105x105xf32> to memref<55x105x105xf32> +// CHECK-NEXT: %150 = memref.subview %149[2, 103, 2] [51, 1, 101] [1, 1, 1] : memref<55x105x105xf32> to memref<51x101xf32, strided<[11025, 1], offset: 32867>> +// CHECK-NEXT: "memref.copy"(%recv_buff_ex2, %150) : (memref<51x101xf32>, memref<51x101xf32, strided<[11025, 1], offset: 32867>>) -> () +// CHECK-NEXT: scf.yield +// CHECK-NEXT: }, { +// CHECK-NEXT: scf.yield +// CHECK-NEXT: }) : (i1) -> () +// CHECK-NEXT: "scf.if"(%100) ({ +// CHECK-NEXT: %151 = builtin.unrealized_conversion_cast %u_t0 : memref<55x105x105xf32> to memref<55x105x105xf32> +// CHECK-NEXT: %152 = memref.subview %151[2, 1, 2] [51, 1, 101] [1, 1, 1] : memref<55x105x105xf32> to memref<51x101xf32, strided<[11025, 1], offset: 22157>> +// CHECK-NEXT: "memref.copy"(%recv_buff_ex3, %152) : (memref<51x101xf32>, memref<51x101xf32, strided<[11025, 1], offset: 22157>>) -> () +// CHECK-NEXT: scf.yield +// CHECK-NEXT: }, { +// CHECK-NEXT: scf.yield +// CHECK-NEXT: }) : (i1) -> () +// CHECK-NEXT: "scf.if"(%118) ({ +// CHECK-NEXT: %153 = builtin.unrealized_conversion_cast %u_t0 : memref<55x105x105xf32> to memref<55x105x105xf32> +// CHECK-NEXT: %154 = memref.subview %153[2, 2, 103] [51, 101, 1] [1, 1, 1] : memref<55x105x105xf32> to memref<51x101xf32, strided<[11025, 105], offset: 22363>> +// CHECK-NEXT: "memref.copy"(%recv_buff_ex4, %154) : (memref<51x101xf32>, memref<51x101xf32, strided<[11025, 105], offset: 22363>>) -> () +// CHECK-NEXT: scf.yield +// CHECK-NEXT: }, { +// CHECK-NEXT: scf.yield +// CHECK-NEXT: }) : (i1) -> () +// CHECK-NEXT: "scf.if"(%135) ({ +// CHECK-NEXT: %155 = builtin.unrealized_conversion_cast %u_t0 : memref<55x105x105xf32> to memref<55x105x105xf32> +// CHECK-NEXT: %156 = memref.subview %155[2, 2, 1] [51, 101, 1] [1, 1, 1] : memref<55x105x105xf32> to memref<51x101xf32, strided<[11025, 105], offset: 22261>> +// CHECK-NEXT: "memref.copy"(%recv_buff_ex5, %156) : (memref<51x101xf32>, memref<51x101xf32, strided<[11025, 105], offset: 22261>>) -> () +// CHECK-NEXT: scf.yield +// CHECK-NEXT: }, { +// CHECK-NEXT: scf.yield +// CHECK-NEXT: }) : (i1) -> () +// CHECK-NEXT: %157 = memref.subview %u_t1[2, 2, 2] [55, 105, 105] [1, 1, 1] : memref<55x105x105xf32> to memref<55x105x105xf32, strided<[11025, 105, 1], offset: 22262>> +// CHECK-NEXT: %u_t0_blk = memref.subview %u_t0[2, 2, 2] [55, 105, 105] [1, 1, 1] : memref<55x105x105xf32> to memref<55x105x105xf32, strided<[11025, 105, 1], offset: 22262>> +// CHECK-NEXT: %u_t2_blk = memref.subview %u_t2[2, 2, 2] [55, 105, 105] [1, 1, 1] : memref<55x105x105xf32> to memref<55x105x105xf32, strided<[11025, 105, 1], offset: 22262>> +// CHECK-NEXT: %158 = arith.constant 0 : index +// CHECK-NEXT: %159 = arith.constant 51 : index +// CHECK-NEXT: %160 = arith.constant 101 : index +// CHECK-NEXT: %161 = arith.constant 64 : index +// CHECK-NEXT: %162 = arith.muli %time_m, %161 : index +// CHECK-NEXT: "scf.parallel"(%158, %158, %159, %160, %162, %162) <{"operandSegmentSizes" = array}> ({ +// CHECK-NEXT: ^0(%163 : index, %164 : index): +// CHECK-NEXT: %165 = "affine.min"(%161, %159, %163) <{"map" = affine_map<(d0, d1, d2) -> (d0, (d1 + (d2 * -1)))>}> : (index, index, index) -> index +// CHECK-NEXT: %166 = "affine.min"(%161, %160, %164) <{"map" = affine_map<(d0, d1, d2) -> (d0, (d1 + (d2 * -1)))>}> : (index, index, index) -> index +// CHECK-NEXT: "scf.parallel"(%158, %158, %158, %165, %166, %160, %time_m, %time_m, %time_m) <{"operandSegmentSizes" = array}> ({ +// CHECK-NEXT: ^1(%167 : index, %168 : index, %169 : index): +// CHECK-NEXT: %170 = arith.addi %163, %167 : index +// CHECK-NEXT: %171 = arith.addi %164, %168 : index // CHECK-NEXT: %dt = arith.constant 1.000000e-04 : f32 -// CHECK-NEXT: %29 = arith.constant 2 : i64 -// CHECK-NEXT: %30 = "math.fpowi"(%dt, %29) : (f32, i64) -> f32 -// CHECK-NEXT: %31 = arith.constant -1 : i64 -// CHECK-NEXT: %dt_1 = arith.constant 1.000000e-04 : f32 -// CHECK-NEXT: %32 = arith.constant -2 : i64 -// CHECK-NEXT: %33 = "math.fpowi"(%dt_1, %32) : (f32, i64) -> f32 -// CHECK-NEXT: %34 = memref.load %u_t2_loadview[%27, %28, %26] : memref<51x101x101xf32, strided<[11025, 105, 1], offset: 22262>> -// CHECK-NEXT: %35 = arith.mulf %33, %34 : f32 -// CHECK-NEXT: %36 = arith.constant -2.000000e+00 : f32 -// CHECK-NEXT: %dt_2 = arith.constant 1.000000e-04 : f32 -// CHECK-NEXT: %37 = arith.constant -2 : i64 -// CHECK-NEXT: %38 = "math.fpowi"(%dt_2, %37) : (f32, i64) -> f32 -// CHECK-NEXT: %39 = memref.load %u_t0_loadview[%27, %28, %26] : memref<53x103x103xf32, strided<[11025, 105, 1], offset: 22262>> -// CHECK-NEXT: %40 = arith.mulf %36, %38 : f32 -// CHECK-NEXT: %41 = arith.mulf %40, %39 : f32 -// CHECK-NEXT: %42 = arith.addf %35, %41 : f32 -// CHECK-NEXT: %43 = arith.sitofp %31 : i64 to f32 -// CHECK-NEXT: %44 = arith.mulf %43, %42 : f32 +// CHECK-NEXT: %172 = arith.constant 2 : i64 +// CHECK-NEXT: %173 = "math.fpowi"(%dt, %172) : (f32, i64) -> f32 +// CHECK-NEXT: %174 = arith.constant -1 : i64 +// CHECK-NEXT: %175 = arith.constant -2 : i64 +// CHECK-NEXT: %176 = "math.fpowi"(%dt, %175) : (f32, i64) -> f32 +// CHECK-NEXT: %177 = memref.load %u_t2_blk[%170, %171, %169] : memref<55x105x105xf32, strided<[11025, 105, 1], offset: 22262>> +// CHECK-NEXT: %178 = arith.mulf %176, %177 : f32 +// CHECK-NEXT: %179 = arith.constant -2.000000e+00 : f32 +// CHECK-NEXT: %180 = memref.load %u_t0_blk[%170, %171, %169] : memref<55x105x105xf32, strided<[11025, 105, 1], offset: 22262>> +// CHECK-NEXT: %181 = arith.mulf %179, %176 : f32 +// CHECK-NEXT: %182 = arith.mulf %181, %180 : f32 +// CHECK-NEXT: %183 = arith.addf %178, %182 : f32 +// CHECK-NEXT: %184 = arith.sitofp %174 : i64 to f32 +// CHECK-NEXT: %185 = arith.mulf %184, %183 : f32 // CHECK-NEXT: %h_x = arith.constant 1.000000e-02 : f32 -// CHECK-NEXT: %45 = arith.constant -2 : i64 -// CHECK-NEXT: %46 = "math.fpowi"(%h_x, %45) : (f32, i64) -> f32 -// CHECK-NEXT: %47 = arith.constant -1 : index -// CHECK-NEXT: %48 = arith.addi %27, %47 : index -// CHECK-NEXT: %49 = memref.load %u_t0_loadview[%48, %28, %26] : memref<53x103x103xf32, strided<[11025, 105, 1], offset: 22262>> -// CHECK-NEXT: %50 = arith.mulf %46, %49 : f32 -// CHECK-NEXT: %h_x_1 = arith.constant 1.000000e-02 : f32 -// CHECK-NEXT: %51 = arith.constant -2 : i64 -// CHECK-NEXT: %52 = "math.fpowi"(%h_x_1, %51) : (f32, i64) -> f32 -// CHECK-NEXT: %53 = arith.constant 1 : index -// CHECK-NEXT: %54 = arith.addi %27, %53 : index -// CHECK-NEXT: %55 = memref.load %u_t0_loadview[%54, %28, %26] : memref<53x103x103xf32, strided<[11025, 105, 1], offset: 22262>> -// CHECK-NEXT: %56 = arith.mulf %52, %55 : f32 -// CHECK-NEXT: %57 = arith.constant -2.000000e+00 : f32 -// CHECK-NEXT: %h_x_2 = arith.constant 1.000000e-02 : f32 -// CHECK-NEXT: %58 = arith.constant -2 : i64 -// CHECK-NEXT: %59 = "math.fpowi"(%h_x_2, %58) : (f32, i64) -> f32 -// CHECK-NEXT: %60 = memref.load %u_t0_loadview[%27, %28, %26] : memref<53x103x103xf32, strided<[11025, 105, 1], offset: 22262>> -// CHECK-NEXT: %61 = arith.mulf %57, %59 : f32 -// CHECK-NEXT: %62 = arith.mulf %61, %60 : f32 -// CHECK-NEXT: %63 = arith.addf %50, %56 : f32 -// CHECK-NEXT: %64 = arith.addf %63, %62 : f32 -// CHECK-NEXT: %h_y = arith.constant 1.000000e-02 : f32 -// CHECK-NEXT: %65 = arith.constant -2 : i64 -// CHECK-NEXT: %66 = "math.fpowi"(%h_y, %65) : (f32, i64) -> f32 -// CHECK-NEXT: %67 = arith.constant -1 : index -// CHECK-NEXT: %68 = arith.addi %28, %67 : index -// CHECK-NEXT: %69 = memref.load %u_t0_loadview[%27, %68, %26] : memref<53x103x103xf32, strided<[11025, 105, 1], offset: 22262>> -// CHECK-NEXT: %70 = arith.mulf %66, %69 : f32 -// CHECK-NEXT: %h_y_1 = arith.constant 1.000000e-02 : f32 -// CHECK-NEXT: %71 = arith.constant -2 : i64 -// CHECK-NEXT: %72 = "math.fpowi"(%h_y_1, %71) : (f32, i64) -> f32 -// CHECK-NEXT: %73 = arith.constant 1 : index -// CHECK-NEXT: %74 = arith.addi %28, %73 : index -// CHECK-NEXT: %75 = memref.load %u_t0_loadview[%27, %74, %26] : memref<53x103x103xf32, strided<[11025, 105, 1], offset: 22262>> -// CHECK-NEXT: %76 = arith.mulf %72, %75 : f32 -// CHECK-NEXT: %77 = arith.constant -2.000000e+00 : f32 -// CHECK-NEXT: %h_y_2 = arith.constant 1.000000e-02 : f32 -// CHECK-NEXT: %78 = arith.constant -2 : i64 -// CHECK-NEXT: %79 = "math.fpowi"(%h_y_2, %78) : (f32, i64) -> f32 -// CHECK-NEXT: %80 = memref.load %u_t0_loadview[%27, %28, %26] : memref<53x103x103xf32, strided<[11025, 105, 1], offset: 22262>> -// CHECK-NEXT: %81 = arith.mulf %77, %79 : f32 -// CHECK-NEXT: %82 = arith.mulf %81, %80 : f32 -// CHECK-NEXT: %83 = arith.addf %70, %76 : f32 -// CHECK-NEXT: %84 = arith.addf %83, %82 : f32 -// CHECK-NEXT: %h_z = arith.constant 1.000000e-02 : f32 -// CHECK-NEXT: %85 = arith.constant -2 : i64 -// CHECK-NEXT: %86 = "math.fpowi"(%h_z, %85) : (f32, i64) -> f32 -// CHECK-NEXT: %87 = arith.constant -1 : index -// CHECK-NEXT: %88 = arith.addi %26, %87 : index -// CHECK-NEXT: %89 = memref.load %u_t0_loadview[%27, %28, %88] : memref<53x103x103xf32, strided<[11025, 105, 1], offset: 22262>> -// CHECK-NEXT: %90 = arith.mulf %86, %89 : f32 -// CHECK-NEXT: %h_z_1 = arith.constant 1.000000e-02 : f32 -// CHECK-NEXT: %91 = arith.constant -2 : i64 -// CHECK-NEXT: %92 = "math.fpowi"(%h_z_1, %91) : (f32, i64) -> f32 -// CHECK-NEXT: %93 = arith.constant 1 : index -// CHECK-NEXT: %94 = arith.addi %26, %93 : index -// CHECK-NEXT: %95 = memref.load %u_t0_loadview[%27, %28, %94] : memref<53x103x103xf32, strided<[11025, 105, 1], offset: 22262>> -// CHECK-NEXT: %96 = arith.mulf %92, %95 : f32 -// CHECK-NEXT: %97 = arith.constant -2.000000e+00 : f32 -// CHECK-NEXT: %h_z_2 = arith.constant 1.000000e-02 : f32 -// CHECK-NEXT: %98 = arith.constant -2 : i64 -// CHECK-NEXT: %99 = "math.fpowi"(%h_z_2, %98) : (f32, i64) -> f32 -// CHECK-NEXT: %100 = memref.load %u_t0_loadview[%27, %28, %26] : memref<53x103x103xf32, strided<[11025, 105, 1], offset: 22262>> -// CHECK-NEXT: %101 = arith.mulf %97, %99 : f32 -// CHECK-NEXT: %102 = arith.mulf %101, %100 : f32 -// CHECK-NEXT: %103 = arith.addf %90, %96 : f32 -// CHECK-NEXT: %104 = arith.addf %103, %102 : f32 -// CHECK-NEXT: %105 = arith.addf %44, %64 : f32 -// CHECK-NEXT: %106 = arith.addf %105, %84 : f32 -// CHECK-NEXT: %107 = arith.addf %106, %104 : f32 -// CHECK-NEXT: %108 = arith.mulf %30, %107 : f32 -// CHECK-NEXT: memref.store %108, %u_t1_storeview[%27, %28, %26] : memref<51x101x101xf32, strided<[11025, 105, 1], offset: 22262>> +// CHECK-NEXT: %186 = "math.fpowi"(%h_x, %175) : (f32, i64) -> f32 +// CHECK-NEXT: %187 = arith.constant -1 : index +// CHECK-NEXT: %188 = arith.addi %170, %187 : index +// CHECK-NEXT: %189 = memref.load %u_t0_blk[%188, %171, %169] : memref<55x105x105xf32, strided<[11025, 105, 1], offset: 22262>> +// CHECK-NEXT: %190 = arith.mulf %186, %189 : f32 +// CHECK-NEXT: %191 = arith.addi %170, %time_m : index +// CHECK-NEXT: %192 = memref.load %u_t0_blk[%191, %171, %169] : memref<55x105x105xf32, strided<[11025, 105, 1], offset: 22262>> +// CHECK-NEXT: %193 = arith.mulf %186, %192 : f32 +// CHECK-NEXT: %194 = arith.mulf %179, %186 : f32 +// CHECK-NEXT: %195 = arith.mulf %194, %180 : f32 +// CHECK-NEXT: %196 = arith.addf %190, %193 : f32 +// CHECK-NEXT: %197 = arith.addf %196, %195 : f32 +// CHECK-NEXT: %198 = arith.addi %171, %187 : index +// CHECK-NEXT: %199 = memref.load %u_t0_blk[%170, %198, %169] : memref<55x105x105xf32, strided<[11025, 105, 1], offset: 22262>> +// CHECK-NEXT: %200 = arith.mulf %186, %199 : f32 +// CHECK-NEXT: %201 = arith.addi %171, %time_m : index +// CHECK-NEXT: %202 = memref.load %u_t0_blk[%170, %201, %169] : memref<55x105x105xf32, strided<[11025, 105, 1], offset: 22262>> +// CHECK-NEXT: %203 = arith.mulf %186, %202 : f32 +// CHECK-NEXT: %204 = arith.addf %200, %203 : f32 +// CHECK-NEXT: %205 = arith.addf %204, %195 : f32 +// CHECK-NEXT: %206 = arith.addi %169, %187 : index +// CHECK-NEXT: %207 = memref.load %u_t0_blk[%170, %171, %206] : memref<55x105x105xf32, strided<[11025, 105, 1], offset: 22262>> +// CHECK-NEXT: %208 = arith.mulf %186, %207 : f32 +// CHECK-NEXT: %209 = arith.addi %169, %time_m : index +// CHECK-NEXT: %210 = memref.load %u_t0_blk[%170, %171, %209] : memref<55x105x105xf32, strided<[11025, 105, 1], offset: 22262>> +// CHECK-NEXT: %211 = arith.mulf %186, %210 : f32 +// CHECK-NEXT: %212 = arith.addf %208, %211 : f32 +// CHECK-NEXT: %213 = arith.addf %212, %195 : f32 +// CHECK-NEXT: %214 = arith.addf %185, %197 : f32 +// CHECK-NEXT: %215 = arith.addf %214, %205 : f32 +// CHECK-NEXT: %216 = arith.addf %215, %213 : f32 +// CHECK-NEXT: %217 = arith.mulf %173, %216 : f32 +// CHECK-NEXT: memref.store %217, %157[%170, %171, %169] : memref<55x105x105xf32, strided<[11025, 105, 1], offset: 22262>> // CHECK-NEXT: scf.yield // CHECK-NEXT: }) : (index, index, index, index, index, index, index, index, index) -> () // CHECK-NEXT: scf.yield // CHECK-NEXT: }) : (index, index, index, index, index, index) -> () -// CHECK-NEXT: %u_t1_temp = "memref.subview"(%u_t1) <{"static_offsets" = array, "static_sizes" = array, "static_strides" = array, "operandSegmentSizes" = array}> : (memref<55x105x105xf32>) -> memref<51x101x101xf32, strided<[11025, 105, 1], offset: 22262>> // CHECK-NEXT: scf.yield %u_t1, %u_t2, %u_t0 : memref<55x105x105xf32>, memref<55x105x105xf32>, memref<55x105x105xf32> // CHECK-NEXT: } -// CHECK-NEXT: %109 = func.call @timer_end(%0) : (f64) -> f64 -// CHECK-NEXT: "llvm.store"(%109, %timers) <{"ordering" = 0 : i64}> : (f64, !llvm.ptr) -> () +// CHECK-NEXT: %218 = func.call @timer_end(%0) : (f64) -> f64 +// CHECK-NEXT: "llvm.store"(%218, %timers) <{"ordering" = 0 : i64}> : (f64, !llvm.ptr) -> () // CHECK-NEXT: func.return // CHECK-NEXT: } // CHECK-NEXT: func.func private @timer_start() -> f64 // CHECK-NEXT: func.func private @timer_end(f64) -> f64 -// CHECK-NEXT: } \ No newline at end of file +// CHECK-NEXT: } diff --git a/tests/filecheck/xdsl_mpi_pipeline_e.mlir b/tests/filecheck/xdsl_mpi_pipeline_e.mlir index 14e82658d0..d39ba2b16a 100644 --- a/tests/filecheck/xdsl_mpi_pipeline_e.mlir +++ b/tests/filecheck/xdsl_mpi_pipeline_e.mlir @@ -1,4 +1,4 @@ -// RUN: xdsl-opt -p "canonicalize,cse,distribute-stencil{strategy=3d-grid slices=2,1,1 restrict_domain=false},shape-inference,canonicalize-dmp,stencil-bufferize,dmp-to-mpi{mpi_init=false},convert-stencil-to-ll-mlir,scf-parallel-loop-tiling{parallel-loop-tile-sizes=64,64,0},dmp-to-mpi{mpi_init=false},lower-mpi" %s | filecheck %s +// RUN: xdsl-opt -p "canonicalize,cse,distribute-stencil{strategy=3d-grid slices=2,1,1 restrict_domain=false},shape-inference,canonicalize-dmp,stencil-bufferize,dmp-to-mpi{mpi_init=false},convert-stencil-to-ll-mlir,scf-parallel-loop-tiling{parallel-loop-tile-sizes=64,64,0},dmp-to-mpi{mpi_init=false},lower-mpi,canonicalize,cse" %s | filecheck %s builtin.module { func.func @Kernel(%u_vec0 : !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, %u_vec1 : !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, %u_vec2 : !stencil.field<[-2,53]x[-2,103]x[-2,103]xf32>, %timers : !llvm.ptr) { @@ -110,586 +110,429 @@ builtin.module { // CHECK-NEXT: %0 = func.call @timer_start() : () -> f64 // CHECK-NEXT: %time_m = arith.constant 1 : index // CHECK-NEXT: %time_M = arith.constant 20 : index -// CHECK-NEXT: %1 = arith.constant 1 : index -// CHECK-NEXT: %2 = arith.addi %time_M, %1 : index -// CHECK-NEXT: %step = arith.constant 1 : index -// CHECK-NEXT: %3 = arith.constant 12 : i32 -// CHECK-NEXT: %4 = "llvm.alloca"(%3) <{"alignment" = 32 : i64, "elem_type" = i32}> : (i32) -> !llvm.ptr -// CHECK-NEXT: %5 = arith.constant 1140850688 : i32 -// CHECK-NEXT: %6 = arith.constant 1 : i64 -// CHECK-NEXT: %7 = "llvm.alloca"(%6) <{"alignment" = 32 : i64, "elem_type" = i32}> : (i64) -> !llvm.ptr -// CHECK-NEXT: %8 = func.call @MPI_Comm_rank(%5, %7) : (i32, !llvm.ptr) -> i32 -// CHECK-NEXT: %9 = "llvm.load"(%7) : (!llvm.ptr) -> i32 +// CHECK-NEXT: %1 = arith.addi %time_M, %time_m : index +// CHECK-NEXT: %2 = arith.constant 12 : i32 +// CHECK-NEXT: %3 = "llvm.alloca"(%2) <{"alignment" = 32 : i64, "elem_type" = i32}> : (i32) -> !llvm.ptr +// CHECK-NEXT: %4 = arith.constant 1140850688 : i32 +// CHECK-NEXT: %5 = arith.constant 1 : i64 +// CHECK-NEXT: %6 = "llvm.alloca"(%5) <{"alignment" = 32 : i64, "elem_type" = i32}> : (i64) -> !llvm.ptr +// CHECK-NEXT: %7 = func.call @MPI_Comm_rank(%4, %6) : (i32, !llvm.ptr) -> i32 +// CHECK-NEXT: %8 = "llvm.load"(%6) : (!llvm.ptr) -> i32 // CHECK-NEXT: %send_buff_ex0 = memref.alloc() {"alignment" = 64 : i64} : memref<101x101xf32> -// CHECK-NEXT: %10 = "memref.extract_aligned_pointer_as_index"(%send_buff_ex0) : (memref<101x101xf32>) -> index -// CHECK-NEXT: %11 = arith.index_cast %10 : index to i64 -// CHECK-NEXT: %send_buff_ex0_ptr = "llvm.inttoptr"(%11) : (i64) -> !llvm.ptr -// CHECK-NEXT: %12 = arith.constant 10201 : i32 -// CHECK-NEXT: %13 = arith.constant 1275069450 : i32 +// CHECK-NEXT: %9 = "memref.extract_aligned_pointer_as_index"(%send_buff_ex0) : (memref<101x101xf32>) -> index +// CHECK-NEXT: %10 = arith.index_cast %9 : index to i64 +// CHECK-NEXT: %send_buff_ex0_ptr = "llvm.inttoptr"(%10) : (i64) -> !llvm.ptr +// CHECK-NEXT: %11 = arith.constant 10201 : i32 +// CHECK-NEXT: %12 = arith.constant 1275069450 : i32 // CHECK-NEXT: %recv_buff_ex0 = memref.alloc() {"alignment" = 64 : i64} : memref<101x101xf32> -// CHECK-NEXT: %14 = "memref.extract_aligned_pointer_as_index"(%recv_buff_ex0) : (memref<101x101xf32>) -> index -// CHECK-NEXT: %15 = arith.index_cast %14 : index to i64 -// CHECK-NEXT: %recv_buff_ex0_ptr = "llvm.inttoptr"(%15) : (i64) -> !llvm.ptr -// CHECK-NEXT: %16 = arith.constant 10201 : i32 -// CHECK-NEXT: %17 = arith.constant 1275069450 : i32 +// CHECK-NEXT: %13 = "memref.extract_aligned_pointer_as_index"(%recv_buff_ex0) : (memref<101x101xf32>) -> index +// CHECK-NEXT: %14 = arith.index_cast %13 : index to i64 +// CHECK-NEXT: %recv_buff_ex0_ptr = "llvm.inttoptr"(%14) : (i64) -> !llvm.ptr // CHECK-NEXT: %send_buff_ex1 = memref.alloc() {"alignment" = 64 : i64} : memref<101x101xf32> -// CHECK-NEXT: %18 = "memref.extract_aligned_pointer_as_index"(%send_buff_ex1) : (memref<101x101xf32>) -> index -// CHECK-NEXT: %19 = arith.index_cast %18 : index to i64 -// CHECK-NEXT: %send_buff_ex1_ptr = "llvm.inttoptr"(%19) : (i64) -> !llvm.ptr -// CHECK-NEXT: %20 = arith.constant 10201 : i32 -// CHECK-NEXT: %21 = arith.constant 1275069450 : i32 +// CHECK-NEXT: %15 = "memref.extract_aligned_pointer_as_index"(%send_buff_ex1) : (memref<101x101xf32>) -> index +// CHECK-NEXT: %16 = arith.index_cast %15 : index to i64 +// CHECK-NEXT: %send_buff_ex1_ptr = "llvm.inttoptr"(%16) : (i64) -> !llvm.ptr // CHECK-NEXT: %recv_buff_ex1 = memref.alloc() {"alignment" = 64 : i64} : memref<101x101xf32> -// CHECK-NEXT: %22 = "memref.extract_aligned_pointer_as_index"(%recv_buff_ex1) : (memref<101x101xf32>) -> index -// CHECK-NEXT: %23 = arith.index_cast %22 : index to i64 -// CHECK-NEXT: %recv_buff_ex1_ptr = "llvm.inttoptr"(%23) : (i64) -> !llvm.ptr -// CHECK-NEXT: %24 = arith.constant 10201 : i32 -// CHECK-NEXT: %25 = arith.constant 1275069450 : i32 +// CHECK-NEXT: %17 = "memref.extract_aligned_pointer_as_index"(%recv_buff_ex1) : (memref<101x101xf32>) -> index +// CHECK-NEXT: %18 = arith.index_cast %17 : index to i64 +// CHECK-NEXT: %recv_buff_ex1_ptr = "llvm.inttoptr"(%18) : (i64) -> !llvm.ptr // CHECK-NEXT: %send_buff_ex2 = memref.alloc() {"alignment" = 64 : i64} : memref<51x101xf32> -// CHECK-NEXT: %26 = "memref.extract_aligned_pointer_as_index"(%send_buff_ex2) : (memref<51x101xf32>) -> index -// CHECK-NEXT: %27 = arith.index_cast %26 : index to i64 -// CHECK-NEXT: %send_buff_ex2_ptr = "llvm.inttoptr"(%27) : (i64) -> !llvm.ptr -// CHECK-NEXT: %28 = arith.constant 5151 : i32 -// CHECK-NEXT: %29 = arith.constant 1275069450 : i32 +// CHECK-NEXT: %19 = "memref.extract_aligned_pointer_as_index"(%send_buff_ex2) : (memref<51x101xf32>) -> index +// CHECK-NEXT: %20 = arith.index_cast %19 : index to i64 +// CHECK-NEXT: %send_buff_ex2_ptr = "llvm.inttoptr"(%20) : (i64) -> !llvm.ptr +// CHECK-NEXT: %21 = arith.constant 5151 : i32 // CHECK-NEXT: %recv_buff_ex2 = memref.alloc() {"alignment" = 64 : i64} : memref<51x101xf32> -// CHECK-NEXT: %30 = "memref.extract_aligned_pointer_as_index"(%recv_buff_ex2) : (memref<51x101xf32>) -> index -// CHECK-NEXT: %31 = arith.index_cast %30 : index to i64 -// CHECK-NEXT: %recv_buff_ex2_ptr = "llvm.inttoptr"(%31) : (i64) -> !llvm.ptr -// CHECK-NEXT: %32 = arith.constant 5151 : i32 -// CHECK-NEXT: %33 = arith.constant 1275069450 : i32 +// CHECK-NEXT: %22 = "memref.extract_aligned_pointer_as_index"(%recv_buff_ex2) : (memref<51x101xf32>) -> index +// CHECK-NEXT: %23 = arith.index_cast %22 : index to i64 +// CHECK-NEXT: %recv_buff_ex2_ptr = "llvm.inttoptr"(%23) : (i64) -> !llvm.ptr // CHECK-NEXT: %send_buff_ex3 = memref.alloc() {"alignment" = 64 : i64} : memref<51x101xf32> -// CHECK-NEXT: %34 = "memref.extract_aligned_pointer_as_index"(%send_buff_ex3) : (memref<51x101xf32>) -> index -// CHECK-NEXT: %35 = arith.index_cast %34 : index to i64 -// CHECK-NEXT: %send_buff_ex3_ptr = "llvm.inttoptr"(%35) : (i64) -> !llvm.ptr -// CHECK-NEXT: %36 = arith.constant 5151 : i32 -// CHECK-NEXT: %37 = arith.constant 1275069450 : i32 +// CHECK-NEXT: %24 = "memref.extract_aligned_pointer_as_index"(%send_buff_ex3) : (memref<51x101xf32>) -> index +// CHECK-NEXT: %25 = arith.index_cast %24 : index to i64 +// CHECK-NEXT: %send_buff_ex3_ptr = "llvm.inttoptr"(%25) : (i64) -> !llvm.ptr // CHECK-NEXT: %recv_buff_ex3 = memref.alloc() {"alignment" = 64 : i64} : memref<51x101xf32> -// CHECK-NEXT: %38 = "memref.extract_aligned_pointer_as_index"(%recv_buff_ex3) : (memref<51x101xf32>) -> index -// CHECK-NEXT: %39 = arith.index_cast %38 : index to i64 -// CHECK-NEXT: %recv_buff_ex3_ptr = "llvm.inttoptr"(%39) : (i64) -> !llvm.ptr -// CHECK-NEXT: %40 = arith.constant 5151 : i32 -// CHECK-NEXT: %41 = arith.constant 1275069450 : i32 +// CHECK-NEXT: %26 = "memref.extract_aligned_pointer_as_index"(%recv_buff_ex3) : (memref<51x101xf32>) -> index +// CHECK-NEXT: %27 = arith.index_cast %26 : index to i64 +// CHECK-NEXT: %recv_buff_ex3_ptr = "llvm.inttoptr"(%27) : (i64) -> !llvm.ptr // CHECK-NEXT: %send_buff_ex4 = memref.alloc() {"alignment" = 64 : i64} : memref<51x101xf32> -// CHECK-NEXT: %42 = "memref.extract_aligned_pointer_as_index"(%send_buff_ex4) : (memref<51x101xf32>) -> index -// CHECK-NEXT: %43 = arith.index_cast %42 : index to i64 -// CHECK-NEXT: %send_buff_ex4_ptr = "llvm.inttoptr"(%43) : (i64) -> !llvm.ptr -// CHECK-NEXT: %44 = arith.constant 5151 : i32 -// CHECK-NEXT: %45 = arith.constant 1275069450 : i32 +// CHECK-NEXT: %28 = "memref.extract_aligned_pointer_as_index"(%send_buff_ex4) : (memref<51x101xf32>) -> index +// CHECK-NEXT: %29 = arith.index_cast %28 : index to i64 +// CHECK-NEXT: %send_buff_ex4_ptr = "llvm.inttoptr"(%29) : (i64) -> !llvm.ptr // CHECK-NEXT: %recv_buff_ex4 = memref.alloc() {"alignment" = 64 : i64} : memref<51x101xf32> -// CHECK-NEXT: %46 = "memref.extract_aligned_pointer_as_index"(%recv_buff_ex4) : (memref<51x101xf32>) -> index -// CHECK-NEXT: %47 = arith.index_cast %46 : index to i64 -// CHECK-NEXT: %recv_buff_ex4_ptr = "llvm.inttoptr"(%47) : (i64) -> !llvm.ptr -// CHECK-NEXT: %48 = arith.constant 5151 : i32 -// CHECK-NEXT: %49 = arith.constant 1275069450 : i32 +// CHECK-NEXT: %30 = "memref.extract_aligned_pointer_as_index"(%recv_buff_ex4) : (memref<51x101xf32>) -> index +// CHECK-NEXT: %31 = arith.index_cast %30 : index to i64 +// CHECK-NEXT: %recv_buff_ex4_ptr = "llvm.inttoptr"(%31) : (i64) -> !llvm.ptr // CHECK-NEXT: %send_buff_ex5 = memref.alloc() {"alignment" = 64 : i64} : memref<51x101xf32> -// CHECK-NEXT: %50 = "memref.extract_aligned_pointer_as_index"(%send_buff_ex5) : (memref<51x101xf32>) -> index -// CHECK-NEXT: %51 = arith.index_cast %50 : index to i64 -// CHECK-NEXT: %send_buff_ex5_ptr = "llvm.inttoptr"(%51) : (i64) -> !llvm.ptr -// CHECK-NEXT: %52 = arith.constant 5151 : i32 -// CHECK-NEXT: %53 = arith.constant 1275069450 : i32 +// CHECK-NEXT: %32 = "memref.extract_aligned_pointer_as_index"(%send_buff_ex5) : (memref<51x101xf32>) -> index +// CHECK-NEXT: %33 = arith.index_cast %32 : index to i64 +// CHECK-NEXT: %send_buff_ex5_ptr = "llvm.inttoptr"(%33) : (i64) -> !llvm.ptr // CHECK-NEXT: %recv_buff_ex5 = memref.alloc() {"alignment" = 64 : i64} : memref<51x101xf32> -// CHECK-NEXT: %54 = "memref.extract_aligned_pointer_as_index"(%recv_buff_ex5) : (memref<51x101xf32>) -> index -// CHECK-NEXT: %55 = arith.index_cast %54 : index to i64 -// CHECK-NEXT: %recv_buff_ex5_ptr = "llvm.inttoptr"(%55) : (i64) -> !llvm.ptr -// CHECK-NEXT: %56 = arith.constant 5151 : i32 -// CHECK-NEXT: %57 = arith.constant 1275069450 : i32 -// CHECK-NEXT: %58, %59, %60 = scf.for %time = %time_m to %2 step %step iter_args(%u_t0 = %u_vec0, %u_t1 = %u_vec1, %u_t2 = %u_vec2) -> (memref<55x105x105xf32>, memref<55x105x105xf32>, memref<55x105x105xf32>) { -// CHECK-NEXT: %u_t1_storeview = "memref.subview"(%u_t1) <{"static_offsets" = array, "static_sizes" = array, "static_strides" = array, "operandSegmentSizes" = array}> : (memref<55x105x105xf32>) -> memref<51x101x101xf32, strided<[11025, 105, 1], offset: 22262>> -// CHECK-NEXT: %u_t0_loadview = "memref.subview"(%u_t0) <{"static_offsets" = array, "static_sizes" = array, "static_strides" = array, "operandSegmentSizes" = array}> : (memref<55x105x105xf32>) -> memref<53x103x103xf32, strided<[11025, 105, 1], offset: 22262>> -// CHECK-NEXT: %61 = arith.constant 0 : i32 -// CHECK-NEXT: %62 = arith.constant 1 : i32 -// CHECK-NEXT: %63 = arith.divui %9, %62 : i32 -// CHECK-NEXT: %64 = arith.remui %9, %62 : i32 -// CHECK-NEXT: %65 = arith.constant 1 : i32 -// CHECK-NEXT: %66 = arith.divui %64, %65 : i32 -// CHECK-NEXT: %67 = arith.remui %64, %65 : i32 -// CHECK-NEXT: %68 = arith.constant 1 : i32 -// CHECK-NEXT: %69 = arith.divui %67, %68 : i32 -// CHECK-NEXT: %70 = arith.remui %67, %68 : i32 -// CHECK-NEXT: %71 = arith.constant 1 : i32 -// CHECK-NEXT: %72 = arith.addi %63, %71 : i32 -// CHECK-NEXT: %73 = arith.constant 2 : i32 -// CHECK-NEXT: %74 = arith.cmpi slt, %72, %73 : i32 -// CHECK-NEXT: %75 = arith.constant true -// CHECK-NEXT: %76 = arith.constant true -// CHECK-NEXT: %77 = arith.andi %74, %75 : i1 -// CHECK-NEXT: %78 = arith.andi %77, %76 : i1 -// CHECK-NEXT: %79 = arith.constant 1 : i32 -// CHECK-NEXT: %80 = arith.muli %79, %72 : i32 -// CHECK-NEXT: %81 = arith.addi %69, %80 : i32 -// CHECK-NEXT: %82 = arith.constant 1 : i32 -// CHECK-NEXT: %83 = arith.muli %82, %66 : i32 -// CHECK-NEXT: %84 = arith.addi %81, %83 : i32 -// CHECK-NEXT: %85 = arith.constant 0 : i32 -// CHECK-NEXT: %86 = arith.constant 6 : i32 -// CHECK-NEXT: %87 = "llvm.ptrtoint"(%4) : (!llvm.ptr) -> i64 -// CHECK-NEXT: %88 = arith.constant 4 : i64 -// CHECK-NEXT: %89 = arith.index_cast %85 : i32 to index -// CHECK-NEXT: %90 = arith.index_cast %89 : index to i64 -// CHECK-NEXT: %91 = arith.muli %88, %90 : i64 -// CHECK-NEXT: %92 = arith.addi %91, %87 : i64 -// CHECK-NEXT: %93 = "llvm.inttoptr"(%92) : (i64) -> !llvm.ptr -// CHECK-NEXT: %94 = "llvm.ptrtoint"(%4) : (!llvm.ptr) -> i64 -// CHECK-NEXT: %95 = arith.constant 4 : i64 -// CHECK-NEXT: %96 = arith.index_cast %86 : i32 to index -// CHECK-NEXT: %97 = arith.index_cast %96 : index to i64 -// CHECK-NEXT: %98 = arith.muli %95, %97 : i64 -// CHECK-NEXT: %99 = arith.addi %98, %94 : i64 -// CHECK-NEXT: %100 = "llvm.inttoptr"(%99) : (i64) -> !llvm.ptr -// CHECK-NEXT: "scf.if"(%78) ({ -// CHECK-NEXT: %101 = "memref.subview"(%u_t0_loadview) <{"static_offsets" = array, "static_sizes" = array, "static_strides" = array, "operandSegmentSizes" = array}> : (memref<53x103x103xf32, strided<[11025, 105, 1], offset: 22262>>) -> memref<101x101xf32, strided<[105, 1], offset: 573512>> -// CHECK-NEXT: "memref.copy"(%101, %send_buff_ex0) : (memref<101x101xf32, strided<[105, 1], offset: 573512>>, memref<101x101xf32>) -> () -// CHECK-NEXT: %102 = arith.constant 1140850688 : i32 -// CHECK-NEXT: %103 = func.call @MPI_Isend(%send_buff_ex0_ptr, %12, %13, %84, %61, %102, %93) : (!llvm.ptr, i32, i32, i32, i32, i32, !llvm.ptr) -> i32 -// CHECK-NEXT: %104 = arith.constant 1140850688 : i32 -// CHECK-NEXT: %105 = func.call @MPI_Irecv(%recv_buff_ex0_ptr, %16, %17, %84, %61, %104, %100) : (!llvm.ptr, i32, i32, i32, i32, i32, !llvm.ptr) -> i32 +// CHECK-NEXT: %34 = "memref.extract_aligned_pointer_as_index"(%recv_buff_ex5) : (memref<51x101xf32>) -> index +// CHECK-NEXT: %35 = arith.index_cast %34 : index to i64 +// CHECK-NEXT: %recv_buff_ex5_ptr = "llvm.inttoptr"(%35) : (i64) -> !llvm.ptr +// CHECK-NEXT: %36, %37, %38 = scf.for %time = %time_m to %1 step %time_m iter_args(%u_t0 = %u_vec0, %u_t1 = %u_vec1, %u_t2 = %u_vec2) -> (memref<55x105x105xf32>, memref<55x105x105xf32>, memref<55x105x105xf32>) { +// CHECK-NEXT: %39 = arith.constant 0 : i32 +// CHECK-NEXT: %40 = arith.constant 1 : i32 +// CHECK-NEXT: %41 = arith.divui %8, %40 : i32 +// CHECK-NEXT: %42 = arith.remui %8, %40 : i32 +// CHECK-NEXT: %43 = arith.divui %42, %40 : i32 +// CHECK-NEXT: %44 = arith.remui %42, %40 : i32 +// CHECK-NEXT: %45 = arith.divui %44, %40 : i32 +// CHECK-NEXT: %46 = arith.remui %44, %40 : i32 +// CHECK-NEXT: %47 = arith.addi %41, %40 : i32 +// CHECK-NEXT: %48 = arith.constant 2 : i32 +// CHECK-NEXT: %49 = arith.cmpi slt, %47, %48 : i32 +// CHECK-NEXT: %50 = arith.constant true +// CHECK-NEXT: %51 = arith.andi %49, %50 : i1 +// CHECK-NEXT: %52 = arith.andi %51, %50 : i1 +// CHECK-NEXT: %53 = arith.muli %40, %47 : i32 +// CHECK-NEXT: %54 = arith.addi %45, %53 : i32 +// CHECK-NEXT: %55 = arith.muli %40, %43 : i32 +// CHECK-NEXT: %56 = arith.addi %54, %55 : i32 +// CHECK-NEXT: %57 = arith.constant 6 : i32 +// CHECK-NEXT: %58 = "llvm.ptrtoint"(%3) : (!llvm.ptr) -> i64 +// CHECK-NEXT: %59 = arith.constant 4 : i64 +// CHECK-NEXT: %60 = arith.index_cast %39 : i32 to index +// CHECK-NEXT: %61 = arith.index_cast %60 : index to i64 +// CHECK-NEXT: %62 = arith.muli %59, %61 : i64 +// CHECK-NEXT: %63 = arith.addi %62, %58 : i64 +// CHECK-NEXT: %64 = "llvm.inttoptr"(%63) : (i64) -> !llvm.ptr +// CHECK-NEXT: %65 = "llvm.ptrtoint"(%3) : (!llvm.ptr) -> i64 +// CHECK-NEXT: %66 = arith.index_cast %57 : i32 to index +// CHECK-NEXT: %67 = arith.index_cast %66 : index to i64 +// CHECK-NEXT: %68 = arith.muli %59, %67 : i64 +// CHECK-NEXT: %69 = arith.addi %68, %65 : i64 +// CHECK-NEXT: %70 = "llvm.inttoptr"(%69) : (i64) -> !llvm.ptr +// CHECK-NEXT: "scf.if"(%52) ({ +// CHECK-NEXT: %71 = builtin.unrealized_conversion_cast %u_t0 : memref<55x105x105xf32> to memref<55x105x105xf32> +// CHECK-NEXT: %72 = memref.subview %71[52, 2, 2] [1, 101, 101] [1, 1, 1] : memref<55x105x105xf32> to memref<101x101xf32, strided<[105, 1], offset: 573512>> +// CHECK-NEXT: "memref.copy"(%72, %send_buff_ex0) : (memref<101x101xf32, strided<[105, 1], offset: 573512>>, memref<101x101xf32>) -> () +// CHECK-NEXT: %73 = func.call @MPI_Isend(%send_buff_ex0_ptr, %11, %12, %56, %39, %4, %64) : (!llvm.ptr, i32, i32, i32, i32, i32, !llvm.ptr) -> i32 +// CHECK-NEXT: %74 = func.call @MPI_Irecv(%recv_buff_ex0_ptr, %11, %12, %56, %39, %4, %70) : (!llvm.ptr, i32, i32, i32, i32, i32, !llvm.ptr) -> i32 // CHECK-NEXT: scf.yield // CHECK-NEXT: }, { -// CHECK-NEXT: %106 = arith.constant 738197504 : i32 -// CHECK-NEXT: "llvm.store"(%106, %93) <{"ordering" = 0 : i64}> : (i32, !llvm.ptr) -> () -// CHECK-NEXT: %107 = arith.constant 738197504 : i32 -// CHECK-NEXT: "llvm.store"(%107, %100) <{"ordering" = 0 : i64}> : (i32, !llvm.ptr) -> () +// CHECK-NEXT: %75 = arith.constant 738197504 : i32 +// CHECK-NEXT: "llvm.store"(%75, %64) <{"ordering" = 0 : i64}> : (i32, !llvm.ptr) -> () +// CHECK-NEXT: "llvm.store"(%75, %70) <{"ordering" = 0 : i64}> : (i32, !llvm.ptr) -> () // CHECK-NEXT: scf.yield // CHECK-NEXT: }) : (i1) -> () -// CHECK-NEXT: %108 = arith.constant 1 : i32 -// CHECK-NEXT: %109 = arith.divui %9, %108 : i32 -// CHECK-NEXT: %110 = arith.remui %9, %108 : i32 -// CHECK-NEXT: %111 = arith.constant 1 : i32 -// CHECK-NEXT: %112 = arith.divui %110, %111 : i32 -// CHECK-NEXT: %113 = arith.remui %110, %111 : i32 -// CHECK-NEXT: %114 = arith.constant 1 : i32 -// CHECK-NEXT: %115 = arith.divui %113, %114 : i32 -// CHECK-NEXT: %116 = arith.remui %113, %114 : i32 -// CHECK-NEXT: %117 = arith.constant -1 : i32 -// CHECK-NEXT: %118 = arith.addi %109, %117 : i32 -// CHECK-NEXT: %119 = arith.constant 0 : i32 -// CHECK-NEXT: %120 = arith.cmpi sge, %118, %119 : i32 -// CHECK-NEXT: %121 = arith.constant true -// CHECK-NEXT: %122 = arith.constant true -// CHECK-NEXT: %123 = arith.andi %120, %121 : i1 -// CHECK-NEXT: %124 = arith.andi %123, %122 : i1 -// CHECK-NEXT: %125 = arith.constant 1 : i32 -// CHECK-NEXT: %126 = arith.muli %125, %118 : i32 -// CHECK-NEXT: %127 = arith.addi %115, %126 : i32 -// CHECK-NEXT: %128 = arith.constant 1 : i32 -// CHECK-NEXT: %129 = arith.muli %128, %112 : i32 -// CHECK-NEXT: %130 = arith.addi %127, %129 : i32 -// CHECK-NEXT: %131 = arith.constant 1 : i32 -// CHECK-NEXT: %132 = arith.constant 7 : i32 -// CHECK-NEXT: %133 = "llvm.ptrtoint"(%4) : (!llvm.ptr) -> i64 -// CHECK-NEXT: %134 = arith.constant 4 : i64 -// CHECK-NEXT: %135 = arith.index_cast %131 : i32 to index -// CHECK-NEXT: %136 = arith.index_cast %135 : index to i64 -// CHECK-NEXT: %137 = arith.muli %134, %136 : i64 -// CHECK-NEXT: %138 = arith.addi %137, %133 : i64 -// CHECK-NEXT: %139 = "llvm.inttoptr"(%138) : (i64) -> !llvm.ptr -// CHECK-NEXT: %140 = "llvm.ptrtoint"(%4) : (!llvm.ptr) -> i64 -// CHECK-NEXT: %141 = arith.constant 4 : i64 -// CHECK-NEXT: %142 = arith.index_cast %132 : i32 to index -// CHECK-NEXT: %143 = arith.index_cast %142 : index to i64 -// CHECK-NEXT: %144 = arith.muli %141, %143 : i64 -// CHECK-NEXT: %145 = arith.addi %144, %140 : i64 -// CHECK-NEXT: %146 = "llvm.inttoptr"(%145) : (i64) -> !llvm.ptr -// CHECK-NEXT: "scf.if"(%124) ({ -// CHECK-NEXT: %147 = "memref.subview"(%u_t0_loadview) <{"static_offsets" = array, "static_sizes" = array, "static_strides" = array, "operandSegmentSizes" = array}> : (memref<53x103x103xf32, strided<[11025, 105, 1], offset: 22262>>) -> memref<101x101xf32, strided<[105, 1], offset: 22262>> -// CHECK-NEXT: "memref.copy"(%147, %send_buff_ex1) : (memref<101x101xf32, strided<[105, 1], offset: 22262>>, memref<101x101xf32>) -> () -// CHECK-NEXT: %148 = arith.constant 1140850688 : i32 -// CHECK-NEXT: %149 = func.call @MPI_Isend(%send_buff_ex1_ptr, %20, %21, %130, %61, %148, %139) : (!llvm.ptr, i32, i32, i32, i32, i32, !llvm.ptr) -> i32 -// CHECK-NEXT: %150 = arith.constant 1140850688 : i32 -// CHECK-NEXT: %151 = func.call @MPI_Irecv(%recv_buff_ex1_ptr, %24, %25, %130, %61, %150, %146) : (!llvm.ptr, i32, i32, i32, i32, i32, !llvm.ptr) -> i32 +// CHECK-NEXT: %76 = arith.remui %8, %40 : i32 +// CHECK-NEXT: %77 = arith.divui %76, %40 : i32 +// CHECK-NEXT: %78 = arith.remui %76, %40 : i32 +// CHECK-NEXT: %79 = arith.divui %78, %40 : i32 +// CHECK-NEXT: %80 = arith.remui %78, %40 : i32 +// CHECK-NEXT: %81 = arith.constant -1 : i32 +// CHECK-NEXT: %82 = arith.addi %41, %81 : i32 +// CHECK-NEXT: %83 = arith.cmpi sge, %82, %39 : i32 +// CHECK-NEXT: %84 = arith.andi %83, %50 : i1 +// CHECK-NEXT: %85 = arith.andi %84, %50 : i1 +// CHECK-NEXT: %86 = arith.muli %40, %82 : i32 +// CHECK-NEXT: %87 = arith.addi %79, %86 : i32 +// CHECK-NEXT: %88 = arith.muli %40, %77 : i32 +// CHECK-NEXT: %89 = arith.addi %87, %88 : i32 +// CHECK-NEXT: %90 = arith.constant 7 : i32 +// CHECK-NEXT: %91 = "llvm.ptrtoint"(%3) : (!llvm.ptr) -> i64 +// CHECK-NEXT: %92 = arith.index_cast %40 : i32 to index +// CHECK-NEXT: %93 = arith.index_cast %92 : index to i64 +// CHECK-NEXT: %94 = arith.muli %59, %93 : i64 +// CHECK-NEXT: %95 = arith.addi %94, %91 : i64 +// CHECK-NEXT: %96 = "llvm.inttoptr"(%95) : (i64) -> !llvm.ptr +// CHECK-NEXT: %97 = "llvm.ptrtoint"(%3) : (!llvm.ptr) -> i64 +// CHECK-NEXT: %98 = arith.index_cast %90 : i32 to index +// CHECK-NEXT: %99 = arith.index_cast %98 : index to i64 +// CHECK-NEXT: %100 = arith.muli %59, %99 : i64 +// CHECK-NEXT: %101 = arith.addi %100, %97 : i64 +// CHECK-NEXT: %102 = "llvm.inttoptr"(%101) : (i64) -> !llvm.ptr +// CHECK-NEXT: "scf.if"(%85) ({ +// CHECK-NEXT: %103 = builtin.unrealized_conversion_cast %u_t0 : memref<55x105x105xf32> to memref<55x105x105xf32> +// CHECK-NEXT: %104 = memref.subview %103[2, 2, 2] [1, 101, 101] [1, 1, 1] : memref<55x105x105xf32> to memref<101x101xf32, strided<[105, 1], offset: 22262>> +// CHECK-NEXT: "memref.copy"(%104, %send_buff_ex1) : (memref<101x101xf32, strided<[105, 1], offset: 22262>>, memref<101x101xf32>) -> () +// CHECK-NEXT: %105 = func.call @MPI_Isend(%send_buff_ex1_ptr, %11, %12, %89, %39, %4, %96) : (!llvm.ptr, i32, i32, i32, i32, i32, !llvm.ptr) -> i32 +// CHECK-NEXT: %106 = func.call @MPI_Irecv(%recv_buff_ex1_ptr, %11, %12, %89, %39, %4, %102) : (!llvm.ptr, i32, i32, i32, i32, i32, !llvm.ptr) -> i32 // CHECK-NEXT: scf.yield // CHECK-NEXT: }, { -// CHECK-NEXT: %152 = arith.constant 738197504 : i32 -// CHECK-NEXT: "llvm.store"(%152, %139) <{"ordering" = 0 : i64}> : (i32, !llvm.ptr) -> () -// CHECK-NEXT: %153 = arith.constant 738197504 : i32 -// CHECK-NEXT: "llvm.store"(%153, %146) <{"ordering" = 0 : i64}> : (i32, !llvm.ptr) -> () +// CHECK-NEXT: %107 = arith.constant 738197504 : i32 +// CHECK-NEXT: "llvm.store"(%107, %96) <{"ordering" = 0 : i64}> : (i32, !llvm.ptr) -> () +// CHECK-NEXT: "llvm.store"(%107, %102) <{"ordering" = 0 : i64}> : (i32, !llvm.ptr) -> () // CHECK-NEXT: scf.yield // CHECK-NEXT: }) : (i1) -> () -// CHECK-NEXT: %154 = arith.constant 1 : i32 -// CHECK-NEXT: %155 = arith.divui %9, %154 : i32 -// CHECK-NEXT: %156 = arith.remui %9, %154 : i32 -// CHECK-NEXT: %157 = arith.constant 1 : i32 -// CHECK-NEXT: %158 = arith.divui %156, %157 : i32 -// CHECK-NEXT: %159 = arith.remui %156, %157 : i32 -// CHECK-NEXT: %160 = arith.constant 1 : i32 -// CHECK-NEXT: %161 = arith.divui %159, %160 : i32 -// CHECK-NEXT: %162 = arith.remui %159, %160 : i32 -// CHECK-NEXT: %163 = arith.constant true -// CHECK-NEXT: %164 = arith.constant 1 : i32 -// CHECK-NEXT: %165 = arith.addi %158, %164 : i32 -// CHECK-NEXT: %166 = arith.constant 1 : i32 -// CHECK-NEXT: %167 = arith.cmpi slt, %165, %166 : i32 -// CHECK-NEXT: %168 = arith.constant true -// CHECK-NEXT: %169 = arith.andi %163, %167 : i1 -// CHECK-NEXT: %170 = arith.andi %169, %168 : i1 -// CHECK-NEXT: %171 = arith.constant 1 : i32 -// CHECK-NEXT: %172 = arith.muli %171, %155 : i32 -// CHECK-NEXT: %173 = arith.addi %161, %172 : i32 -// CHECK-NEXT: %174 = arith.constant 1 : i32 -// CHECK-NEXT: %175 = arith.muli %174, %165 : i32 -// CHECK-NEXT: %176 = arith.addi %173, %175 : i32 -// CHECK-NEXT: %177 = arith.constant 2 : i32 -// CHECK-NEXT: %178 = arith.constant 8 : i32 -// CHECK-NEXT: %179 = "llvm.ptrtoint"(%4) : (!llvm.ptr) -> i64 -// CHECK-NEXT: %180 = arith.constant 4 : i64 -// CHECK-NEXT: %181 = arith.index_cast %177 : i32 to index -// CHECK-NEXT: %182 = arith.index_cast %181 : index to i64 -// CHECK-NEXT: %183 = arith.muli %180, %182 : i64 -// CHECK-NEXT: %184 = arith.addi %183, %179 : i64 -// CHECK-NEXT: %185 = "llvm.inttoptr"(%184) : (i64) -> !llvm.ptr -// CHECK-NEXT: %186 = "llvm.ptrtoint"(%4) : (!llvm.ptr) -> i64 -// CHECK-NEXT: %187 = arith.constant 4 : i64 -// CHECK-NEXT: %188 = arith.index_cast %178 : i32 to index -// CHECK-NEXT: %189 = arith.index_cast %188 : index to i64 -// CHECK-NEXT: %190 = arith.muli %187, %189 : i64 -// CHECK-NEXT: %191 = arith.addi %190, %186 : i64 -// CHECK-NEXT: %192 = "llvm.inttoptr"(%191) : (i64) -> !llvm.ptr -// CHECK-NEXT: "scf.if"(%170) ({ -// CHECK-NEXT: %193 = "memref.subview"(%u_t0_loadview) <{"static_offsets" = array, "static_sizes" = array, "static_strides" = array, "operandSegmentSizes" = array}> : (memref<53x103x103xf32, strided<[11025, 105, 1], offset: 22262>>) -> memref<51x101xf32, strided<[11025, 1], offset: 32762>> -// CHECK-NEXT: "memref.copy"(%193, %send_buff_ex2) : (memref<51x101xf32, strided<[11025, 1], offset: 32762>>, memref<51x101xf32>) -> () -// CHECK-NEXT: %194 = arith.constant 1140850688 : i32 -// CHECK-NEXT: %195 = func.call @MPI_Isend(%send_buff_ex2_ptr, %28, %29, %176, %61, %194, %185) : (!llvm.ptr, i32, i32, i32, i32, i32, !llvm.ptr) -> i32 -// CHECK-NEXT: %196 = arith.constant 1140850688 : i32 -// CHECK-NEXT: %197 = func.call @MPI_Irecv(%recv_buff_ex2_ptr, %32, %33, %176, %61, %196, %192) : (!llvm.ptr, i32, i32, i32, i32, i32, !llvm.ptr) -> i32 +// CHECK-NEXT: %108 = arith.remui %8, %40 : i32 +// CHECK-NEXT: %109 = arith.divui %108, %40 : i32 +// CHECK-NEXT: %110 = arith.remui %108, %40 : i32 +// CHECK-NEXT: %111 = arith.divui %110, %40 : i32 +// CHECK-NEXT: %112 = arith.remui %110, %40 : i32 +// CHECK-NEXT: %113 = arith.addi %109, %40 : i32 +// CHECK-NEXT: %114 = arith.cmpi slt, %113, %40 : i32 +// CHECK-NEXT: %115 = arith.andi %50, %114 : i1 +// CHECK-NEXT: %116 = arith.andi %115, %50 : i1 +// CHECK-NEXT: %117 = arith.muli %40, %41 : i32 +// CHECK-NEXT: %118 = arith.addi %111, %117 : i32 +// CHECK-NEXT: %119 = arith.muli %40, %113 : i32 +// CHECK-NEXT: %120 = arith.addi %118, %119 : i32 +// CHECK-NEXT: %121 = arith.constant 8 : i32 +// CHECK-NEXT: %122 = "llvm.ptrtoint"(%3) : (!llvm.ptr) -> i64 +// CHECK-NEXT: %123 = arith.index_cast %48 : i32 to index +// CHECK-NEXT: %124 = arith.index_cast %123 : index to i64 +// CHECK-NEXT: %125 = arith.muli %59, %124 : i64 +// CHECK-NEXT: %126 = arith.addi %125, %122 : i64 +// CHECK-NEXT: %127 = "llvm.inttoptr"(%126) : (i64) -> !llvm.ptr +// CHECK-NEXT: %128 = "llvm.ptrtoint"(%3) : (!llvm.ptr) -> i64 +// CHECK-NEXT: %129 = arith.index_cast %121 : i32 to index +// CHECK-NEXT: %130 = arith.index_cast %129 : index to i64 +// CHECK-NEXT: %131 = arith.muli %59, %130 : i64 +// CHECK-NEXT: %132 = arith.addi %131, %128 : i64 +// CHECK-NEXT: %133 = "llvm.inttoptr"(%132) : (i64) -> !llvm.ptr +// CHECK-NEXT: "scf.if"(%116) ({ +// CHECK-NEXT: %134 = builtin.unrealized_conversion_cast %u_t0 : memref<55x105x105xf32> to memref<55x105x105xf32> +// CHECK-NEXT: %135 = memref.subview %134[2, 102, 2] [51, 1, 101] [1, 1, 1] : memref<55x105x105xf32> to memref<51x101xf32, strided<[11025, 1], offset: 32762>> +// CHECK-NEXT: "memref.copy"(%135, %send_buff_ex2) : (memref<51x101xf32, strided<[11025, 1], offset: 32762>>, memref<51x101xf32>) -> () +// CHECK-NEXT: %136 = func.call @MPI_Isend(%send_buff_ex2_ptr, %21, %12, %120, %39, %4, %127) : (!llvm.ptr, i32, i32, i32, i32, i32, !llvm.ptr) -> i32 +// CHECK-NEXT: %137 = func.call @MPI_Irecv(%recv_buff_ex2_ptr, %21, %12, %120, %39, %4, %133) : (!llvm.ptr, i32, i32, i32, i32, i32, !llvm.ptr) -> i32 // CHECK-NEXT: scf.yield // CHECK-NEXT: }, { -// CHECK-NEXT: %198 = arith.constant 738197504 : i32 -// CHECK-NEXT: "llvm.store"(%198, %185) <{"ordering" = 0 : i64}> : (i32, !llvm.ptr) -> () -// CHECK-NEXT: %199 = arith.constant 738197504 : i32 -// CHECK-NEXT: "llvm.store"(%199, %192) <{"ordering" = 0 : i64}> : (i32, !llvm.ptr) -> () +// CHECK-NEXT: %138 = arith.constant 738197504 : i32 +// CHECK-NEXT: "llvm.store"(%138, %127) <{"ordering" = 0 : i64}> : (i32, !llvm.ptr) -> () +// CHECK-NEXT: "llvm.store"(%138, %133) <{"ordering" = 0 : i64}> : (i32, !llvm.ptr) -> () // CHECK-NEXT: scf.yield // CHECK-NEXT: }) : (i1) -> () -// CHECK-NEXT: %200 = arith.constant 1 : i32 -// CHECK-NEXT: %201 = arith.divui %9, %200 : i32 -// CHECK-NEXT: %202 = arith.remui %9, %200 : i32 -// CHECK-NEXT: %203 = arith.constant 1 : i32 -// CHECK-NEXT: %204 = arith.divui %202, %203 : i32 -// CHECK-NEXT: %205 = arith.remui %202, %203 : i32 -// CHECK-NEXT: %206 = arith.constant 1 : i32 -// CHECK-NEXT: %207 = arith.divui %205, %206 : i32 -// CHECK-NEXT: %208 = arith.remui %205, %206 : i32 -// CHECK-NEXT: %209 = arith.constant true -// CHECK-NEXT: %210 = arith.constant -1 : i32 -// CHECK-NEXT: %211 = arith.addi %204, %210 : i32 -// CHECK-NEXT: %212 = arith.constant 0 : i32 -// CHECK-NEXT: %213 = arith.cmpi sge, %211, %212 : i32 -// CHECK-NEXT: %214 = arith.constant true -// CHECK-NEXT: %215 = arith.andi %209, %213 : i1 -// CHECK-NEXT: %216 = arith.andi %215, %214 : i1 -// CHECK-NEXT: %217 = arith.constant 1 : i32 -// CHECK-NEXT: %218 = arith.muli %217, %201 : i32 -// CHECK-NEXT: %219 = arith.addi %207, %218 : i32 -// CHECK-NEXT: %220 = arith.constant 1 : i32 -// CHECK-NEXT: %221 = arith.muli %220, %211 : i32 -// CHECK-NEXT: %222 = arith.addi %219, %221 : i32 -// CHECK-NEXT: %223 = arith.constant 3 : i32 -// CHECK-NEXT: %224 = arith.constant 9 : i32 -// CHECK-NEXT: %225 = "llvm.ptrtoint"(%4) : (!llvm.ptr) -> i64 -// CHECK-NEXT: %226 = arith.constant 4 : i64 -// CHECK-NEXT: %227 = arith.index_cast %223 : i32 to index -// CHECK-NEXT: %228 = arith.index_cast %227 : index to i64 -// CHECK-NEXT: %229 = arith.muli %226, %228 : i64 -// CHECK-NEXT: %230 = arith.addi %229, %225 : i64 -// CHECK-NEXT: %231 = "llvm.inttoptr"(%230) : (i64) -> !llvm.ptr -// CHECK-NEXT: %232 = "llvm.ptrtoint"(%4) : (!llvm.ptr) -> i64 -// CHECK-NEXT: %233 = arith.constant 4 : i64 -// CHECK-NEXT: %234 = arith.index_cast %224 : i32 to index -// CHECK-NEXT: %235 = arith.index_cast %234 : index to i64 -// CHECK-NEXT: %236 = arith.muli %233, %235 : i64 -// CHECK-NEXT: %237 = arith.addi %236, %232 : i64 -// CHECK-NEXT: %238 = "llvm.inttoptr"(%237) : (i64) -> !llvm.ptr -// CHECK-NEXT: "scf.if"(%216) ({ -// CHECK-NEXT: %239 = "memref.subview"(%u_t0_loadview) <{"static_offsets" = array, "static_sizes" = array, "static_strides" = array, "operandSegmentSizes" = array}> : (memref<53x103x103xf32, strided<[11025, 105, 1], offset: 22262>>) -> memref<51x101xf32, strided<[11025, 1], offset: 22262>> -// CHECK-NEXT: "memref.copy"(%239, %send_buff_ex3) : (memref<51x101xf32, strided<[11025, 1], offset: 22262>>, memref<51x101xf32>) -> () -// CHECK-NEXT: %240 = arith.constant 1140850688 : i32 -// CHECK-NEXT: %241 = func.call @MPI_Isend(%send_buff_ex3_ptr, %36, %37, %222, %61, %240, %231) : (!llvm.ptr, i32, i32, i32, i32, i32, !llvm.ptr) -> i32 -// CHECK-NEXT: %242 = arith.constant 1140850688 : i32 -// CHECK-NEXT: %243 = func.call @MPI_Irecv(%recv_buff_ex3_ptr, %40, %41, %222, %61, %242, %238) : (!llvm.ptr, i32, i32, i32, i32, i32, !llvm.ptr) -> i32 +// CHECK-NEXT: %139 = arith.remui %8, %40 : i32 +// CHECK-NEXT: %140 = arith.divui %139, %40 : i32 +// CHECK-NEXT: %141 = arith.remui %139, %40 : i32 +// CHECK-NEXT: %142 = arith.divui %141, %40 : i32 +// CHECK-NEXT: %143 = arith.remui %141, %40 : i32 +// CHECK-NEXT: %144 = arith.addi %140, %81 : i32 +// CHECK-NEXT: %145 = arith.cmpi sge, %144, %39 : i32 +// CHECK-NEXT: %146 = arith.andi %50, %145 : i1 +// CHECK-NEXT: %147 = arith.andi %146, %50 : i1 +// CHECK-NEXT: %148 = arith.addi %142, %117 : i32 +// CHECK-NEXT: %149 = arith.muli %40, %144 : i32 +// CHECK-NEXT: %150 = arith.addi %148, %149 : i32 +// CHECK-NEXT: %151 = arith.constant 3 : i32 +// CHECK-NEXT: %152 = arith.constant 9 : i32 +// CHECK-NEXT: %153 = "llvm.ptrtoint"(%3) : (!llvm.ptr) -> i64 +// CHECK-NEXT: %154 = arith.index_cast %151 : i32 to index +// CHECK-NEXT: %155 = arith.index_cast %154 : index to i64 +// CHECK-NEXT: %156 = arith.muli %59, %155 : i64 +// CHECK-NEXT: %157 = arith.addi %156, %153 : i64 +// CHECK-NEXT: %158 = "llvm.inttoptr"(%157) : (i64) -> !llvm.ptr +// CHECK-NEXT: %159 = "llvm.ptrtoint"(%3) : (!llvm.ptr) -> i64 +// CHECK-NEXT: %160 = arith.index_cast %152 : i32 to index +// CHECK-NEXT: %161 = arith.index_cast %160 : index to i64 +// CHECK-NEXT: %162 = arith.muli %59, %161 : i64 +// CHECK-NEXT: %163 = arith.addi %162, %159 : i64 +// CHECK-NEXT: %164 = "llvm.inttoptr"(%163) : (i64) -> !llvm.ptr +// CHECK-NEXT: "scf.if"(%147) ({ +// CHECK-NEXT: %165 = builtin.unrealized_conversion_cast %u_t0 : memref<55x105x105xf32> to memref<55x105x105xf32> +// CHECK-NEXT: %166 = memref.subview %165[2, 2, 2] [51, 1, 101] [1, 1, 1] : memref<55x105x105xf32> to memref<51x101xf32, strided<[11025, 1], offset: 22262>> +// CHECK-NEXT: "memref.copy"(%166, %send_buff_ex3) : (memref<51x101xf32, strided<[11025, 1], offset: 22262>>, memref<51x101xf32>) -> () +// CHECK-NEXT: %167 = func.call @MPI_Isend(%send_buff_ex3_ptr, %21, %12, %150, %39, %4, %158) : (!llvm.ptr, i32, i32, i32, i32, i32, !llvm.ptr) -> i32 +// CHECK-NEXT: %168 = func.call @MPI_Irecv(%recv_buff_ex3_ptr, %21, %12, %150, %39, %4, %164) : (!llvm.ptr, i32, i32, i32, i32, i32, !llvm.ptr) -> i32 // CHECK-NEXT: scf.yield // CHECK-NEXT: }, { -// CHECK-NEXT: %244 = arith.constant 738197504 : i32 -// CHECK-NEXT: "llvm.store"(%244, %231) <{"ordering" = 0 : i64}> : (i32, !llvm.ptr) -> () -// CHECK-NEXT: %245 = arith.constant 738197504 : i32 -// CHECK-NEXT: "llvm.store"(%245, %238) <{"ordering" = 0 : i64}> : (i32, !llvm.ptr) -> () +// CHECK-NEXT: %169 = arith.constant 738197504 : i32 +// CHECK-NEXT: "llvm.store"(%169, %158) <{"ordering" = 0 : i64}> : (i32, !llvm.ptr) -> () +// CHECK-NEXT: "llvm.store"(%169, %164) <{"ordering" = 0 : i64}> : (i32, !llvm.ptr) -> () // CHECK-NEXT: scf.yield // CHECK-NEXT: }) : (i1) -> () -// CHECK-NEXT: %246 = arith.constant 1 : i32 -// CHECK-NEXT: %247 = arith.divui %9, %246 : i32 -// CHECK-NEXT: %248 = arith.remui %9, %246 : i32 -// CHECK-NEXT: %249 = arith.constant 1 : i32 -// CHECK-NEXT: %250 = arith.divui %248, %249 : i32 -// CHECK-NEXT: %251 = arith.remui %248, %249 : i32 -// CHECK-NEXT: %252 = arith.constant 1 : i32 -// CHECK-NEXT: %253 = arith.divui %251, %252 : i32 -// CHECK-NEXT: %254 = arith.remui %251, %252 : i32 -// CHECK-NEXT: %255 = arith.constant true -// CHECK-NEXT: %256 = arith.constant true -// CHECK-NEXT: %257 = arith.constant 1 : i32 -// CHECK-NEXT: %258 = arith.addi %253, %257 : i32 -// CHECK-NEXT: %259 = arith.constant 1 : i32 -// CHECK-NEXT: %260 = arith.cmpi slt, %258, %259 : i32 -// CHECK-NEXT: %261 = arith.andi %255, %256 : i1 -// CHECK-NEXT: %262 = arith.andi %261, %260 : i1 -// CHECK-NEXT: %263 = arith.constant 1 : i32 -// CHECK-NEXT: %264 = arith.muli %263, %247 : i32 -// CHECK-NEXT: %265 = arith.addi %258, %264 : i32 -// CHECK-NEXT: %266 = arith.constant 1 : i32 -// CHECK-NEXT: %267 = arith.muli %266, %250 : i32 -// CHECK-NEXT: %268 = arith.addi %265, %267 : i32 -// CHECK-NEXT: %269 = arith.constant 4 : i32 -// CHECK-NEXT: %270 = arith.constant 10 : i32 -// CHECK-NEXT: %271 = "llvm.ptrtoint"(%4) : (!llvm.ptr) -> i64 -// CHECK-NEXT: %272 = arith.constant 4 : i64 -// CHECK-NEXT: %273 = arith.index_cast %269 : i32 to index -// CHECK-NEXT: %274 = arith.index_cast %273 : index to i64 -// CHECK-NEXT: %275 = arith.muli %272, %274 : i64 -// CHECK-NEXT: %276 = arith.addi %275, %271 : i64 -// CHECK-NEXT: %277 = "llvm.inttoptr"(%276) : (i64) -> !llvm.ptr -// CHECK-NEXT: %278 = "llvm.ptrtoint"(%4) : (!llvm.ptr) -> i64 -// CHECK-NEXT: %279 = arith.constant 4 : i64 -// CHECK-NEXT: %280 = arith.index_cast %270 : i32 to index -// CHECK-NEXT: %281 = arith.index_cast %280 : index to i64 -// CHECK-NEXT: %282 = arith.muli %279, %281 : i64 -// CHECK-NEXT: %283 = arith.addi %282, %278 : i64 -// CHECK-NEXT: %284 = "llvm.inttoptr"(%283) : (i64) -> !llvm.ptr -// CHECK-NEXT: "scf.if"(%262) ({ -// CHECK-NEXT: %285 = "memref.subview"(%u_t0_loadview) <{"static_offsets" = array, "static_sizes" = array, "static_strides" = array, "operandSegmentSizes" = array}> : (memref<53x103x103xf32, strided<[11025, 105, 1], offset: 22262>>) -> memref<51x101xf32, strided<[11025, 105], offset: 22362>> -// CHECK-NEXT: "memref.copy"(%285, %send_buff_ex4) : (memref<51x101xf32, strided<[11025, 105], offset: 22362>>, memref<51x101xf32>) -> () -// CHECK-NEXT: %286 = arith.constant 1140850688 : i32 -// CHECK-NEXT: %287 = func.call @MPI_Isend(%send_buff_ex4_ptr, %44, %45, %268, %61, %286, %277) : (!llvm.ptr, i32, i32, i32, i32, i32, !llvm.ptr) -> i32 -// CHECK-NEXT: %288 = arith.constant 1140850688 : i32 -// CHECK-NEXT: %289 = func.call @MPI_Irecv(%recv_buff_ex4_ptr, %48, %49, %268, %61, %288, %284) : (!llvm.ptr, i32, i32, i32, i32, i32, !llvm.ptr) -> i32 +// CHECK-NEXT: %170 = arith.remui %8, %40 : i32 +// CHECK-NEXT: %171 = arith.divui %170, %40 : i32 +// CHECK-NEXT: %172 = arith.remui %170, %40 : i32 +// CHECK-NEXT: %173 = arith.divui %172, %40 : i32 +// CHECK-NEXT: %174 = arith.remui %172, %40 : i32 +// CHECK-NEXT: %175 = arith.addi %173, %40 : i32 +// CHECK-NEXT: %176 = arith.cmpi slt, %175, %40 : i32 +// CHECK-NEXT: %177 = arith.andi %50, %50 : i1 +// CHECK-NEXT: %178 = arith.andi %177, %176 : i1 +// CHECK-NEXT: %179 = arith.addi %175, %117 : i32 +// CHECK-NEXT: %180 = arith.muli %40, %171 : i32 +// CHECK-NEXT: %181 = arith.addi %179, %180 : i32 +// CHECK-NEXT: %182 = arith.constant 4 : i32 +// CHECK-NEXT: %183 = arith.constant 10 : i32 +// CHECK-NEXT: %184 = "llvm.ptrtoint"(%3) : (!llvm.ptr) -> i64 +// CHECK-NEXT: %185 = arith.index_cast %182 : i32 to index +// CHECK-NEXT: %186 = arith.index_cast %185 : index to i64 +// CHECK-NEXT: %187 = arith.muli %59, %186 : i64 +// CHECK-NEXT: %188 = arith.addi %187, %184 : i64 +// CHECK-NEXT: %189 = "llvm.inttoptr"(%188) : (i64) -> !llvm.ptr +// CHECK-NEXT: %190 = "llvm.ptrtoint"(%3) : (!llvm.ptr) -> i64 +// CHECK-NEXT: %191 = arith.index_cast %183 : i32 to index +// CHECK-NEXT: %192 = arith.index_cast %191 : index to i64 +// CHECK-NEXT: %193 = arith.muli %59, %192 : i64 +// CHECK-NEXT: %194 = arith.addi %193, %190 : i64 +// CHECK-NEXT: %195 = "llvm.inttoptr"(%194) : (i64) -> !llvm.ptr +// CHECK-NEXT: "scf.if"(%178) ({ +// CHECK-NEXT: %196 = builtin.unrealized_conversion_cast %u_t0 : memref<55x105x105xf32> to memref<55x105x105xf32> +// CHECK-NEXT: %197 = memref.subview %196[2, 2, 102] [51, 101, 1] [1, 1, 1] : memref<55x105x105xf32> to memref<51x101xf32, strided<[11025, 105], offset: 22362>> +// CHECK-NEXT: "memref.copy"(%197, %send_buff_ex4) : (memref<51x101xf32, strided<[11025, 105], offset: 22362>>, memref<51x101xf32>) -> () +// CHECK-NEXT: %198 = func.call @MPI_Isend(%send_buff_ex4_ptr, %21, %12, %181, %39, %4, %189) : (!llvm.ptr, i32, i32, i32, i32, i32, !llvm.ptr) -> i32 +// CHECK-NEXT: %199 = func.call @MPI_Irecv(%recv_buff_ex4_ptr, %21, %12, %181, %39, %4, %195) : (!llvm.ptr, i32, i32, i32, i32, i32, !llvm.ptr) -> i32 // CHECK-NEXT: scf.yield // CHECK-NEXT: }, { -// CHECK-NEXT: %290 = arith.constant 738197504 : i32 -// CHECK-NEXT: "llvm.store"(%290, %277) <{"ordering" = 0 : i64}> : (i32, !llvm.ptr) -> () -// CHECK-NEXT: %291 = arith.constant 738197504 : i32 -// CHECK-NEXT: "llvm.store"(%291, %284) <{"ordering" = 0 : i64}> : (i32, !llvm.ptr) -> () +// CHECK-NEXT: %200 = arith.constant 738197504 : i32 +// CHECK-NEXT: "llvm.store"(%200, %189) <{"ordering" = 0 : i64}> : (i32, !llvm.ptr) -> () +// CHECK-NEXT: "llvm.store"(%200, %195) <{"ordering" = 0 : i64}> : (i32, !llvm.ptr) -> () // CHECK-NEXT: scf.yield // CHECK-NEXT: }) : (i1) -> () -// CHECK-NEXT: %292 = arith.constant 1 : i32 -// CHECK-NEXT: %293 = arith.divui %9, %292 : i32 -// CHECK-NEXT: %294 = arith.remui %9, %292 : i32 -// CHECK-NEXT: %295 = arith.constant 1 : i32 -// CHECK-NEXT: %296 = arith.divui %294, %295 : i32 -// CHECK-NEXT: %297 = arith.remui %294, %295 : i32 -// CHECK-NEXT: %298 = arith.constant 1 : i32 -// CHECK-NEXT: %299 = arith.divui %297, %298 : i32 -// CHECK-NEXT: %300 = arith.remui %297, %298 : i32 -// CHECK-NEXT: %301 = arith.constant true -// CHECK-NEXT: %302 = arith.constant true -// CHECK-NEXT: %303 = arith.constant -1 : i32 -// CHECK-NEXT: %304 = arith.addi %299, %303 : i32 -// CHECK-NEXT: %305 = arith.constant 0 : i32 -// CHECK-NEXT: %306 = arith.cmpi sge, %304, %305 : i32 -// CHECK-NEXT: %307 = arith.andi %301, %302 : i1 -// CHECK-NEXT: %308 = arith.andi %307, %306 : i1 -// CHECK-NEXT: %309 = arith.constant 1 : i32 -// CHECK-NEXT: %310 = arith.muli %309, %293 : i32 -// CHECK-NEXT: %311 = arith.addi %304, %310 : i32 -// CHECK-NEXT: %312 = arith.constant 1 : i32 -// CHECK-NEXT: %313 = arith.muli %312, %296 : i32 -// CHECK-NEXT: %314 = arith.addi %311, %313 : i32 -// CHECK-NEXT: %315 = arith.constant 5 : i32 -// CHECK-NEXT: %316 = arith.constant 11 : i32 -// CHECK-NEXT: %317 = "llvm.ptrtoint"(%4) : (!llvm.ptr) -> i64 -// CHECK-NEXT: %318 = arith.constant 4 : i64 -// CHECK-NEXT: %319 = arith.index_cast %315 : i32 to index -// CHECK-NEXT: %320 = arith.index_cast %319 : index to i64 -// CHECK-NEXT: %321 = arith.muli %318, %320 : i64 -// CHECK-NEXT: %322 = arith.addi %321, %317 : i64 -// CHECK-NEXT: %323 = "llvm.inttoptr"(%322) : (i64) -> !llvm.ptr -// CHECK-NEXT: %324 = "llvm.ptrtoint"(%4) : (!llvm.ptr) -> i64 -// CHECK-NEXT: %325 = arith.constant 4 : i64 -// CHECK-NEXT: %326 = arith.index_cast %316 : i32 to index -// CHECK-NEXT: %327 = arith.index_cast %326 : index to i64 -// CHECK-NEXT: %328 = arith.muli %325, %327 : i64 -// CHECK-NEXT: %329 = arith.addi %328, %324 : i64 -// CHECK-NEXT: %330 = "llvm.inttoptr"(%329) : (i64) -> !llvm.ptr -// CHECK-NEXT: "scf.if"(%308) ({ -// CHECK-NEXT: %331 = "memref.subview"(%u_t0_loadview) <{"static_offsets" = array, "static_sizes" = array, "static_strides" = array, "operandSegmentSizes" = array}> : (memref<53x103x103xf32, strided<[11025, 105, 1], offset: 22262>>) -> memref<51x101xf32, strided<[11025, 105], offset: 22262>> -// CHECK-NEXT: "memref.copy"(%331, %send_buff_ex5) : (memref<51x101xf32, strided<[11025, 105], offset: 22262>>, memref<51x101xf32>) -> () -// CHECK-NEXT: %332 = arith.constant 1140850688 : i32 -// CHECK-NEXT: %333 = func.call @MPI_Isend(%send_buff_ex5_ptr, %52, %53, %314, %61, %332, %323) : (!llvm.ptr, i32, i32, i32, i32, i32, !llvm.ptr) -> i32 -// CHECK-NEXT: %334 = arith.constant 1140850688 : i32 -// CHECK-NEXT: %335 = func.call @MPI_Irecv(%recv_buff_ex5_ptr, %56, %57, %314, %61, %334, %330) : (!llvm.ptr, i32, i32, i32, i32, i32, !llvm.ptr) -> i32 +// CHECK-NEXT: %201 = arith.remui %8, %40 : i32 +// CHECK-NEXT: %202 = arith.divui %201, %40 : i32 +// CHECK-NEXT: %203 = arith.remui %201, %40 : i32 +// CHECK-NEXT: %204 = arith.divui %203, %40 : i32 +// CHECK-NEXT: %205 = arith.remui %203, %40 : i32 +// CHECK-NEXT: %206 = arith.addi %204, %81 : i32 +// CHECK-NEXT: %207 = arith.cmpi sge, %206, %39 : i32 +// CHECK-NEXT: %208 = arith.andi %177, %207 : i1 +// CHECK-NEXT: %209 = arith.addi %206, %117 : i32 +// CHECK-NEXT: %210 = arith.muli %40, %202 : i32 +// CHECK-NEXT: %211 = arith.addi %209, %210 : i32 +// CHECK-NEXT: %212 = arith.constant 5 : i32 +// CHECK-NEXT: %213 = arith.constant 11 : i32 +// CHECK-NEXT: %214 = "llvm.ptrtoint"(%3) : (!llvm.ptr) -> i64 +// CHECK-NEXT: %215 = arith.index_cast %212 : i32 to index +// CHECK-NEXT: %216 = arith.index_cast %215 : index to i64 +// CHECK-NEXT: %217 = arith.muli %59, %216 : i64 +// CHECK-NEXT: %218 = arith.addi %217, %214 : i64 +// CHECK-NEXT: %219 = "llvm.inttoptr"(%218) : (i64) -> !llvm.ptr +// CHECK-NEXT: %220 = "llvm.ptrtoint"(%3) : (!llvm.ptr) -> i64 +// CHECK-NEXT: %221 = arith.index_cast %213 : i32 to index +// CHECK-NEXT: %222 = arith.index_cast %221 : index to i64 +// CHECK-NEXT: %223 = arith.muli %59, %222 : i64 +// CHECK-NEXT: %224 = arith.addi %223, %220 : i64 +// CHECK-NEXT: %225 = "llvm.inttoptr"(%224) : (i64) -> !llvm.ptr +// CHECK-NEXT: "scf.if"(%208) ({ +// CHECK-NEXT: %226 = builtin.unrealized_conversion_cast %u_t0 : memref<55x105x105xf32> to memref<55x105x105xf32> +// CHECK-NEXT: %227 = memref.subview %226[2, 2, 2] [51, 101, 1] [1, 1, 1] : memref<55x105x105xf32> to memref<51x101xf32, strided<[11025, 105], offset: 22262>> +// CHECK-NEXT: "memref.copy"(%227, %send_buff_ex5) : (memref<51x101xf32, strided<[11025, 105], offset: 22262>>, memref<51x101xf32>) -> () +// CHECK-NEXT: %228 = func.call @MPI_Isend(%send_buff_ex5_ptr, %21, %12, %211, %39, %4, %219) : (!llvm.ptr, i32, i32, i32, i32, i32, !llvm.ptr) -> i32 +// CHECK-NEXT: %229 = func.call @MPI_Irecv(%recv_buff_ex5_ptr, %21, %12, %211, %39, %4, %225) : (!llvm.ptr, i32, i32, i32, i32, i32, !llvm.ptr) -> i32 // CHECK-NEXT: scf.yield // CHECK-NEXT: }, { -// CHECK-NEXT: %336 = arith.constant 738197504 : i32 -// CHECK-NEXT: "llvm.store"(%336, %323) <{"ordering" = 0 : i64}> : (i32, !llvm.ptr) -> () -// CHECK-NEXT: %337 = arith.constant 738197504 : i32 -// CHECK-NEXT: "llvm.store"(%337, %330) <{"ordering" = 0 : i64}> : (i32, !llvm.ptr) -> () +// CHECK-NEXT: %230 = arith.constant 738197504 : i32 +// CHECK-NEXT: "llvm.store"(%230, %219) <{"ordering" = 0 : i64}> : (i32, !llvm.ptr) -> () +// CHECK-NEXT: "llvm.store"(%230, %225) <{"ordering" = 0 : i64}> : (i32, !llvm.ptr) -> () // CHECK-NEXT: scf.yield // CHECK-NEXT: }) : (i1) -> () -// CHECK-NEXT: %338 = arith.constant 1 : i64 -// CHECK-NEXT: %339 = "llvm.inttoptr"(%338) : (i64) -> !llvm.ptr -// CHECK-NEXT: %340 = func.call @MPI_Waitall(%3, %4, %339) : (i32, !llvm.ptr, !llvm.ptr) -> i32 -// CHECK-NEXT: "scf.if"(%78) ({ -// CHECK-NEXT: %341 = "memref.subview"(%u_t0_loadview) <{"static_offsets" = array, "static_sizes" = array, "static_strides" = array, "operandSegmentSizes" = array}> : (memref<53x103x103xf32, strided<[11025, 105, 1], offset: 22262>>) -> memref<101x101xf32, strided<[105, 1], offset: 584537>> -// CHECK-NEXT: "memref.copy"(%recv_buff_ex0, %341) : (memref<101x101xf32>, memref<101x101xf32, strided<[105, 1], offset: 584537>>) -> () +// CHECK-NEXT: %231 = "llvm.inttoptr"(%5) : (i64) -> !llvm.ptr +// CHECK-NEXT: %232 = func.call @MPI_Waitall(%2, %3, %231) : (i32, !llvm.ptr, !llvm.ptr) -> i32 +// CHECK-NEXT: "scf.if"(%52) ({ +// CHECK-NEXT: %233 = builtin.unrealized_conversion_cast %u_t0 : memref<55x105x105xf32> to memref<55x105x105xf32> +// CHECK-NEXT: %234 = memref.subview %233[53, 2, 2] [1, 101, 101] [1, 1, 1] : memref<55x105x105xf32> to memref<101x101xf32, strided<[105, 1], offset: 584537>> +// CHECK-NEXT: "memref.copy"(%recv_buff_ex0, %234) : (memref<101x101xf32>, memref<101x101xf32, strided<[105, 1], offset: 584537>>) -> () // CHECK-NEXT: scf.yield // CHECK-NEXT: }, { // CHECK-NEXT: scf.yield // CHECK-NEXT: }) : (i1) -> () -// CHECK-NEXT: "scf.if"(%124) ({ -// CHECK-NEXT: %342 = "memref.subview"(%u_t0_loadview) <{"static_offsets" = array, "static_sizes" = array, "static_strides" = array, "operandSegmentSizes" = array}> : (memref<53x103x103xf32, strided<[11025, 105, 1], offset: 22262>>) -> memref<101x101xf32, strided<[105, 1], offset: 11237>> -// CHECK-NEXT: "memref.copy"(%recv_buff_ex1, %342) : (memref<101x101xf32>, memref<101x101xf32, strided<[105, 1], offset: 11237>>) -> () +// CHECK-NEXT: "scf.if"(%85) ({ +// CHECK-NEXT: %235 = builtin.unrealized_conversion_cast %u_t0 : memref<55x105x105xf32> to memref<55x105x105xf32> +// CHECK-NEXT: %236 = memref.subview %235[1, 2, 2] [1, 101, 101] [1, 1, 1] : memref<55x105x105xf32> to memref<101x101xf32, strided<[105, 1], offset: 11237>> +// CHECK-NEXT: "memref.copy"(%recv_buff_ex1, %236) : (memref<101x101xf32>, memref<101x101xf32, strided<[105, 1], offset: 11237>>) -> () // CHECK-NEXT: scf.yield // CHECK-NEXT: }, { // CHECK-NEXT: scf.yield // CHECK-NEXT: }) : (i1) -> () -// CHECK-NEXT: "scf.if"(%170) ({ -// CHECK-NEXT: %343 = "memref.subview"(%u_t0_loadview) <{"static_offsets" = array, "static_sizes" = array, "static_strides" = array, "operandSegmentSizes" = array}> : (memref<53x103x103xf32, strided<[11025, 105, 1], offset: 22262>>) -> memref<51x101xf32, strided<[11025, 1], offset: 32867>> -// CHECK-NEXT: "memref.copy"(%recv_buff_ex2, %343) : (memref<51x101xf32>, memref<51x101xf32, strided<[11025, 1], offset: 32867>>) -> () +// CHECK-NEXT: "scf.if"(%116) ({ +// CHECK-NEXT: %237 = builtin.unrealized_conversion_cast %u_t0 : memref<55x105x105xf32> to memref<55x105x105xf32> +// CHECK-NEXT: %238 = memref.subview %237[2, 103, 2] [51, 1, 101] [1, 1, 1] : memref<55x105x105xf32> to memref<51x101xf32, strided<[11025, 1], offset: 32867>> +// CHECK-NEXT: "memref.copy"(%recv_buff_ex2, %238) : (memref<51x101xf32>, memref<51x101xf32, strided<[11025, 1], offset: 32867>>) -> () // CHECK-NEXT: scf.yield // CHECK-NEXT: }, { // CHECK-NEXT: scf.yield // CHECK-NEXT: }) : (i1) -> () -// CHECK-NEXT: "scf.if"(%216) ({ -// CHECK-NEXT: %344 = "memref.subview"(%u_t0_loadview) <{"static_offsets" = array, "static_sizes" = array, "static_strides" = array, "operandSegmentSizes" = array}> : (memref<53x103x103xf32, strided<[11025, 105, 1], offset: 22262>>) -> memref<51x101xf32, strided<[11025, 1], offset: 22157>> -// CHECK-NEXT: "memref.copy"(%recv_buff_ex3, %344) : (memref<51x101xf32>, memref<51x101xf32, strided<[11025, 1], offset: 22157>>) -> () +// CHECK-NEXT: "scf.if"(%147) ({ +// CHECK-NEXT: %239 = builtin.unrealized_conversion_cast %u_t0 : memref<55x105x105xf32> to memref<55x105x105xf32> +// CHECK-NEXT: %240 = memref.subview %239[2, 1, 2] [51, 1, 101] [1, 1, 1] : memref<55x105x105xf32> to memref<51x101xf32, strided<[11025, 1], offset: 22157>> +// CHECK-NEXT: "memref.copy"(%recv_buff_ex3, %240) : (memref<51x101xf32>, memref<51x101xf32, strided<[11025, 1], offset: 22157>>) -> () // CHECK-NEXT: scf.yield // CHECK-NEXT: }, { // CHECK-NEXT: scf.yield // CHECK-NEXT: }) : (i1) -> () -// CHECK-NEXT: "scf.if"(%262) ({ -// CHECK-NEXT: %345 = "memref.subview"(%u_t0_loadview) <{"static_offsets" = array, "static_sizes" = array, "static_strides" = array, "operandSegmentSizes" = array}> : (memref<53x103x103xf32, strided<[11025, 105, 1], offset: 22262>>) -> memref<51x101xf32, strided<[11025, 105], offset: 22363>> -// CHECK-NEXT: "memref.copy"(%recv_buff_ex4, %345) : (memref<51x101xf32>, memref<51x101xf32, strided<[11025, 105], offset: 22363>>) -> () +// CHECK-NEXT: "scf.if"(%178) ({ +// CHECK-NEXT: %241 = builtin.unrealized_conversion_cast %u_t0 : memref<55x105x105xf32> to memref<55x105x105xf32> +// CHECK-NEXT: %242 = memref.subview %241[2, 2, 103] [51, 101, 1] [1, 1, 1] : memref<55x105x105xf32> to memref<51x101xf32, strided<[11025, 105], offset: 22363>> +// CHECK-NEXT: "memref.copy"(%recv_buff_ex4, %242) : (memref<51x101xf32>, memref<51x101xf32, strided<[11025, 105], offset: 22363>>) -> () // CHECK-NEXT: scf.yield // CHECK-NEXT: }, { // CHECK-NEXT: scf.yield // CHECK-NEXT: }) : (i1) -> () -// CHECK-NEXT: "scf.if"(%308) ({ -// CHECK-NEXT: %346 = "memref.subview"(%u_t0_loadview) <{"static_offsets" = array, "static_sizes" = array, "static_strides" = array, "operandSegmentSizes" = array}> : (memref<53x103x103xf32, strided<[11025, 105, 1], offset: 22262>>) -> memref<51x101xf32, strided<[11025, 105], offset: 22261>> -// CHECK-NEXT: "memref.copy"(%recv_buff_ex5, %346) : (memref<51x101xf32>, memref<51x101xf32, strided<[11025, 105], offset: 22261>>) -> () +// CHECK-NEXT: "scf.if"(%208) ({ +// CHECK-NEXT: %243 = builtin.unrealized_conversion_cast %u_t0 : memref<55x105x105xf32> to memref<55x105x105xf32> +// CHECK-NEXT: %244 = memref.subview %243[2, 2, 1] [51, 101, 1] [1, 1, 1] : memref<55x105x105xf32> to memref<51x101xf32, strided<[11025, 105], offset: 22261>> +// CHECK-NEXT: "memref.copy"(%recv_buff_ex5, %244) : (memref<51x101xf32>, memref<51x101xf32, strided<[11025, 105], offset: 22261>>) -> () // CHECK-NEXT: scf.yield // CHECK-NEXT: }, { // CHECK-NEXT: scf.yield // CHECK-NEXT: }) : (i1) -> () -// CHECK-NEXT: %u_t2_loadview = "memref.subview"(%u_t2) <{"static_offsets" = array, "static_sizes" = array, "static_strides" = array, "operandSegmentSizes" = array}> : (memref<55x105x105xf32>) -> memref<51x101x101xf32, strided<[11025, 105, 1], offset: 22262>> -// CHECK-NEXT: %347 = arith.constant 0 : index -// CHECK-NEXT: %348 = arith.constant 0 : index -// CHECK-NEXT: %349 = arith.constant 0 : index -// CHECK-NEXT: %350 = arith.constant 1 : index -// CHECK-NEXT: %351 = arith.constant 1 : index -// CHECK-NEXT: %352 = arith.constant 1 : index -// CHECK-NEXT: %353 = arith.constant 51 : index -// CHECK-NEXT: %354 = arith.constant 101 : index -// CHECK-NEXT: %355 = arith.constant 101 : index -// CHECK-NEXT: %356 = arith.constant 0 : index -// CHECK-NEXT: %357 = arith.constant 64 : index -// CHECK-NEXT: %358 = arith.constant 64 : index -// CHECK-NEXT: %359 = arith.muli %350, %357 : index -// CHECK-NEXT: %360 = arith.muli %351, %358 : index -// CHECK-NEXT: "scf.parallel"(%347, %348, %353, %354, %359, %360) <{"operandSegmentSizes" = array}> ({ -// CHECK-NEXT: ^0(%361 : index, %362 : index): -// CHECK-NEXT: %363 = "affine.min"(%357, %353, %361) <{"map" = affine_map<(d0, d1, d2) -> (d0, (d1 + (d2 * -1)))>}> : (index, index, index) -> index -// CHECK-NEXT: %364 = "affine.min"(%358, %354, %362) <{"map" = affine_map<(d0, d1, d2) -> (d0, (d1 + (d2 * -1)))>}> : (index, index, index) -> index -// CHECK-NEXT: "scf.parallel"(%356, %356, %349, %363, %364, %355, %350, %351, %352) <{"operandSegmentSizes" = array}> ({ -// CHECK-NEXT: ^1(%365 : index, %366 : index, %367 : index): -// CHECK-NEXT: %368 = arith.addi %361, %365 : index -// CHECK-NEXT: %369 = arith.addi %362, %366 : index +// CHECK-NEXT: %245 = memref.subview %u_t1[2, 2, 2] [55, 105, 105] [1, 1, 1] : memref<55x105x105xf32> to memref<55x105x105xf32, strided<[11025, 105, 1], offset: 22262>> +// CHECK-NEXT: %u_t0_blk = memref.subview %u_t0[2, 2, 2] [55, 105, 105] [1, 1, 1] : memref<55x105x105xf32> to memref<55x105x105xf32, strided<[11025, 105, 1], offset: 22262>> +// CHECK-NEXT: %u_t2_blk = memref.subview %u_t2[2, 2, 2] [55, 105, 105] [1, 1, 1] : memref<55x105x105xf32> to memref<55x105x105xf32, strided<[11025, 105, 1], offset: 22262>> +// CHECK-NEXT: %246 = arith.constant 0 : index +// CHECK-NEXT: %247 = arith.constant 51 : index +// CHECK-NEXT: %248 = arith.constant 101 : index +// CHECK-NEXT: %249 = arith.constant 64 : index +// CHECK-NEXT: %250 = arith.muli %time_m, %249 : index +// CHECK-NEXT: "scf.parallel"(%246, %246, %247, %248, %250, %250) <{"operandSegmentSizes" = array}> ({ +// CHECK-NEXT: ^0(%251 : index, %252 : index): +// CHECK-NEXT: %253 = "affine.min"(%249, %247, %251) <{"map" = affine_map<(d0, d1, d2) -> (d0, (d1 + (d2 * -1)))>}> : (index, index, index) -> index +// CHECK-NEXT: %254 = "affine.min"(%249, %248, %252) <{"map" = affine_map<(d0, d1, d2) -> (d0, (d1 + (d2 * -1)))>}> : (index, index, index) -> index +// CHECK-NEXT: "scf.parallel"(%246, %246, %246, %253, %254, %248, %time_m, %time_m, %time_m) <{"operandSegmentSizes" = array}> ({ +// CHECK-NEXT: ^1(%255 : index, %256 : index, %257 : index): +// CHECK-NEXT: %258 = arith.addi %251, %255 : index +// CHECK-NEXT: %259 = arith.addi %252, %256 : index // CHECK-NEXT: %dt = arith.constant 1.000000e-04 : f32 -// CHECK-NEXT: %370 = arith.constant 2 : i64 -// CHECK-NEXT: %371 = "math.fpowi"(%dt, %370) : (f32, i64) -> f32 -// CHECK-NEXT: %372 = arith.constant -1 : i64 -// CHECK-NEXT: %dt_1 = arith.constant 1.000000e-04 : f32 -// CHECK-NEXT: %373 = arith.constant -2 : i64 -// CHECK-NEXT: %374 = "math.fpowi"(%dt_1, %373) : (f32, i64) -> f32 -// CHECK-NEXT: %375 = memref.load %u_t2_loadview[%368, %369, %367] : memref<51x101x101xf32, strided<[11025, 105, 1], offset: 22262>> -// CHECK-NEXT: %376 = arith.mulf %374, %375 : f32 -// CHECK-NEXT: %377 = arith.constant -2.000000e+00 : f32 -// CHECK-NEXT: %dt_2 = arith.constant 1.000000e-04 : f32 -// CHECK-NEXT: %378 = arith.constant -2 : i64 -// CHECK-NEXT: %379 = "math.fpowi"(%dt_2, %378) : (f32, i64) -> f32 -// CHECK-NEXT: %380 = memref.load %u_t0_loadview[%368, %369, %367] : memref<53x103x103xf32, strided<[11025, 105, 1], offset: 22262>> -// CHECK-NEXT: %381 = arith.mulf %377, %379 : f32 -// CHECK-NEXT: %382 = arith.mulf %381, %380 : f32 -// CHECK-NEXT: %383 = arith.addf %376, %382 : f32 -// CHECK-NEXT: %384 = arith.sitofp %372 : i64 to f32 -// CHECK-NEXT: %385 = arith.mulf %384, %383 : f32 +// CHECK-NEXT: %260 = arith.constant 2 : i64 +// CHECK-NEXT: %261 = "math.fpowi"(%dt, %260) : (f32, i64) -> f32 +// CHECK-NEXT: %262 = arith.constant -1 : i64 +// CHECK-NEXT: %263 = arith.constant -2 : i64 +// CHECK-NEXT: %264 = "math.fpowi"(%dt, %263) : (f32, i64) -> f32 +// CHECK-NEXT: %265 = memref.load %u_t2_blk[%258, %259, %257] : memref<55x105x105xf32, strided<[11025, 105, 1], offset: 22262>> +// CHECK-NEXT: %266 = arith.mulf %264, %265 : f32 +// CHECK-NEXT: %267 = arith.constant -2.000000e+00 : f32 +// CHECK-NEXT: %268 = memref.load %u_t0_blk[%258, %259, %257] : memref<55x105x105xf32, strided<[11025, 105, 1], offset: 22262>> +// CHECK-NEXT: %269 = arith.mulf %267, %264 : f32 +// CHECK-NEXT: %270 = arith.mulf %269, %268 : f32 +// CHECK-NEXT: %271 = arith.addf %266, %270 : f32 +// CHECK-NEXT: %272 = arith.sitofp %262 : i64 to f32 +// CHECK-NEXT: %273 = arith.mulf %272, %271 : f32 // CHECK-NEXT: %h_x = arith.constant 1.000000e-02 : f32 -// CHECK-NEXT: %386 = arith.constant -2 : i64 -// CHECK-NEXT: %387 = "math.fpowi"(%h_x, %386) : (f32, i64) -> f32 -// CHECK-NEXT: %388 = arith.constant -1 : index -// CHECK-NEXT: %389 = arith.addi %368, %388 : index -// CHECK-NEXT: %390 = memref.load %u_t0_loadview[%389, %369, %367] : memref<53x103x103xf32, strided<[11025, 105, 1], offset: 22262>> -// CHECK-NEXT: %391 = arith.mulf %387, %390 : f32 -// CHECK-NEXT: %h_x_1 = arith.constant 1.000000e-02 : f32 -// CHECK-NEXT: %392 = arith.constant -2 : i64 -// CHECK-NEXT: %393 = "math.fpowi"(%h_x_1, %392) : (f32, i64) -> f32 -// CHECK-NEXT: %394 = arith.constant 1 : index -// CHECK-NEXT: %395 = arith.addi %368, %394 : index -// CHECK-NEXT: %396 = memref.load %u_t0_loadview[%395, %369, %367] : memref<53x103x103xf32, strided<[11025, 105, 1], offset: 22262>> -// CHECK-NEXT: %397 = arith.mulf %393, %396 : f32 -// CHECK-NEXT: %398 = arith.constant -2.000000e+00 : f32 -// CHECK-NEXT: %h_x_2 = arith.constant 1.000000e-02 : f32 -// CHECK-NEXT: %399 = arith.constant -2 : i64 -// CHECK-NEXT: %400 = "math.fpowi"(%h_x_2, %399) : (f32, i64) -> f32 -// CHECK-NEXT: %401 = memref.load %u_t0_loadview[%368, %369, %367] : memref<53x103x103xf32, strided<[11025, 105, 1], offset: 22262>> -// CHECK-NEXT: %402 = arith.mulf %398, %400 : f32 -// CHECK-NEXT: %403 = arith.mulf %402, %401 : f32 -// CHECK-NEXT: %404 = arith.addf %391, %397 : f32 -// CHECK-NEXT: %405 = arith.addf %404, %403 : f32 -// CHECK-NEXT: %h_y = arith.constant 1.000000e-02 : f32 -// CHECK-NEXT: %406 = arith.constant -2 : i64 -// CHECK-NEXT: %407 = "math.fpowi"(%h_y, %406) : (f32, i64) -> f32 -// CHECK-NEXT: %408 = arith.constant -1 : index -// CHECK-NEXT: %409 = arith.addi %369, %408 : index -// CHECK-NEXT: %410 = memref.load %u_t0_loadview[%368, %409, %367] : memref<53x103x103xf32, strided<[11025, 105, 1], offset: 22262>> -// CHECK-NEXT: %411 = arith.mulf %407, %410 : f32 -// CHECK-NEXT: %h_y_1 = arith.constant 1.000000e-02 : f32 -// CHECK-NEXT: %412 = arith.constant -2 : i64 -// CHECK-NEXT: %413 = "math.fpowi"(%h_y_1, %412) : (f32, i64) -> f32 -// CHECK-NEXT: %414 = arith.constant 1 : index -// CHECK-NEXT: %415 = arith.addi %369, %414 : index -// CHECK-NEXT: %416 = memref.load %u_t0_loadview[%368, %415, %367] : memref<53x103x103xf32, strided<[11025, 105, 1], offset: 22262>> -// CHECK-NEXT: %417 = arith.mulf %413, %416 : f32 -// CHECK-NEXT: %418 = arith.constant -2.000000e+00 : f32 -// CHECK-NEXT: %h_y_2 = arith.constant 1.000000e-02 : f32 -// CHECK-NEXT: %419 = arith.constant -2 : i64 -// CHECK-NEXT: %420 = "math.fpowi"(%h_y_2, %419) : (f32, i64) -> f32 -// CHECK-NEXT: %421 = memref.load %u_t0_loadview[%368, %369, %367] : memref<53x103x103xf32, strided<[11025, 105, 1], offset: 22262>> -// CHECK-NEXT: %422 = arith.mulf %418, %420 : f32 -// CHECK-NEXT: %423 = arith.mulf %422, %421 : f32 -// CHECK-NEXT: %424 = arith.addf %411, %417 : f32 -// CHECK-NEXT: %425 = arith.addf %424, %423 : f32 -// CHECK-NEXT: %h_z = arith.constant 1.000000e-02 : f32 -// CHECK-NEXT: %426 = arith.constant -2 : i64 -// CHECK-NEXT: %427 = "math.fpowi"(%h_z, %426) : (f32, i64) -> f32 -// CHECK-NEXT: %428 = arith.constant -1 : index -// CHECK-NEXT: %429 = arith.addi %367, %428 : index -// CHECK-NEXT: %430 = memref.load %u_t0_loadview[%368, %369, %429] : memref<53x103x103xf32, strided<[11025, 105, 1], offset: 22262>> -// CHECK-NEXT: %431 = arith.mulf %427, %430 : f32 -// CHECK-NEXT: %h_z_1 = arith.constant 1.000000e-02 : f32 -// CHECK-NEXT: %432 = arith.constant -2 : i64 -// CHECK-NEXT: %433 = "math.fpowi"(%h_z_1, %432) : (f32, i64) -> f32 -// CHECK-NEXT: %434 = arith.constant 1 : index -// CHECK-NEXT: %435 = arith.addi %367, %434 : index -// CHECK-NEXT: %436 = memref.load %u_t0_loadview[%368, %369, %435] : memref<53x103x103xf32, strided<[11025, 105, 1], offset: 22262>> -// CHECK-NEXT: %437 = arith.mulf %433, %436 : f32 -// CHECK-NEXT: %438 = arith.constant -2.000000e+00 : f32 -// CHECK-NEXT: %h_z_2 = arith.constant 1.000000e-02 : f32 -// CHECK-NEXT: %439 = arith.constant -2 : i64 -// CHECK-NEXT: %440 = "math.fpowi"(%h_z_2, %439) : (f32, i64) -> f32 -// CHECK-NEXT: %441 = memref.load %u_t0_loadview[%368, %369, %367] : memref<53x103x103xf32, strided<[11025, 105, 1], offset: 22262>> -// CHECK-NEXT: %442 = arith.mulf %438, %440 : f32 -// CHECK-NEXT: %443 = arith.mulf %442, %441 : f32 -// CHECK-NEXT: %444 = arith.addf %431, %437 : f32 -// CHECK-NEXT: %445 = arith.addf %444, %443 : f32 -// CHECK-NEXT: %446 = arith.addf %385, %405 : f32 -// CHECK-NEXT: %447 = arith.addf %446, %425 : f32 -// CHECK-NEXT: %448 = arith.addf %447, %445 : f32 -// CHECK-NEXT: %449 = arith.mulf %371, %448 : f32 -// CHECK-NEXT: memref.store %449, %u_t1_storeview[%368, %369, %367] : memref<51x101x101xf32, strided<[11025, 105, 1], offset: 22262>> +// CHECK-NEXT: %274 = "math.fpowi"(%h_x, %263) : (f32, i64) -> f32 +// CHECK-NEXT: %275 = arith.constant -1 : index +// CHECK-NEXT: %276 = arith.addi %258, %275 : index +// CHECK-NEXT: %277 = memref.load %u_t0_blk[%276, %259, %257] : memref<55x105x105xf32, strided<[11025, 105, 1], offset: 22262>> +// CHECK-NEXT: %278 = arith.mulf %274, %277 : f32 +// CHECK-NEXT: %279 = arith.addi %258, %time_m : index +// CHECK-NEXT: %280 = memref.load %u_t0_blk[%279, %259, %257] : memref<55x105x105xf32, strided<[11025, 105, 1], offset: 22262>> +// CHECK-NEXT: %281 = arith.mulf %274, %280 : f32 +// CHECK-NEXT: %282 = arith.mulf %267, %274 : f32 +// CHECK-NEXT: %283 = arith.mulf %282, %268 : f32 +// CHECK-NEXT: %284 = arith.addf %278, %281 : f32 +// CHECK-NEXT: %285 = arith.addf %284, %283 : f32 +// CHECK-NEXT: %286 = arith.addi %259, %275 : index +// CHECK-NEXT: %287 = memref.load %u_t0_blk[%258, %286, %257] : memref<55x105x105xf32, strided<[11025, 105, 1], offset: 22262>> +// CHECK-NEXT: %288 = arith.mulf %274, %287 : f32 +// CHECK-NEXT: %289 = arith.addi %259, %time_m : index +// CHECK-NEXT: %290 = memref.load %u_t0_blk[%258, %289, %257] : memref<55x105x105xf32, strided<[11025, 105, 1], offset: 22262>> +// CHECK-NEXT: %291 = arith.mulf %274, %290 : f32 +// CHECK-NEXT: %292 = arith.addf %288, %291 : f32 +// CHECK-NEXT: %293 = arith.addf %292, %283 : f32 +// CHECK-NEXT: %294 = arith.addi %257, %275 : index +// CHECK-NEXT: %295 = memref.load %u_t0_blk[%258, %259, %294] : memref<55x105x105xf32, strided<[11025, 105, 1], offset: 22262>> +// CHECK-NEXT: %296 = arith.mulf %274, %295 : f32 +// CHECK-NEXT: %297 = arith.addi %257, %time_m : index +// CHECK-NEXT: %298 = memref.load %u_t0_blk[%258, %259, %297] : memref<55x105x105xf32, strided<[11025, 105, 1], offset: 22262>> +// CHECK-NEXT: %299 = arith.mulf %274, %298 : f32 +// CHECK-NEXT: %300 = arith.addf %296, %299 : f32 +// CHECK-NEXT: %301 = arith.addf %300, %283 : f32 +// CHECK-NEXT: %302 = arith.addf %273, %285 : f32 +// CHECK-NEXT: %303 = arith.addf %302, %293 : f32 +// CHECK-NEXT: %304 = arith.addf %303, %301 : f32 +// CHECK-NEXT: %305 = arith.mulf %261, %304 : f32 +// CHECK-NEXT: memref.store %305, %245[%258, %259, %257] : memref<55x105x105xf32, strided<[11025, 105, 1], offset: 22262>> // CHECK-NEXT: scf.yield // CHECK-NEXT: }) : (index, index, index, index, index, index, index, index, index) -> () // CHECK-NEXT: scf.yield // CHECK-NEXT: }) : (index, index, index, index, index, index) -> () -// CHECK-NEXT: %u_t1_temp = "memref.subview"(%u_t1) <{"static_offsets" = array, "static_sizes" = array, "static_strides" = array, "operandSegmentSizes" = array}> : (memref<55x105x105xf32>) -> memref<51x101x101xf32, strided<[11025, 105, 1], offset: 22262>> // CHECK-NEXT: scf.yield %u_t1, %u_t2, %u_t0 : memref<55x105x105xf32>, memref<55x105x105xf32>, memref<55x105x105xf32> // CHECK-NEXT: } -// CHECK-NEXT: %450 = func.call @timer_end(%0) : (f64) -> f64 -// CHECK-NEXT: "llvm.store"(%450, %timers) <{"ordering" = 0 : i64}> : (f64, !llvm.ptr) -> () +// CHECK-NEXT: %306 = func.call @timer_end(%0) : (f64) -> f64 +// CHECK-NEXT: "llvm.store"(%306, %timers) <{"ordering" = 0 : i64}> : (f64, !llvm.ptr) -> () // CHECK-NEXT: func.return // CHECK-NEXT: } // CHECK-NEXT: func.func private @timer_start() -> f64 @@ -698,4 +541,4 @@ builtin.module { // CHECK-NEXT: func.func private @MPI_Isend(!llvm.ptr, i32, i32, i32, i32, i32, !llvm.ptr) -> i32 // CHECK-NEXT: func.func private @MPI_Irecv(!llvm.ptr, i32, i32, i32, i32, i32, !llvm.ptr) -> i32 // CHECK-NEXT: func.func private @MPI_Waitall(i32, !llvm.ptr, !llvm.ptr) -> i32 -// CHECK-NEXT: } \ No newline at end of file +// CHECK-NEXT: } diff --git a/tests/filecheck/xdsl_pipeline.mlir b/tests/filecheck/xdsl_pipeline.mlir index f2981a3a17..207eb73d56 100644 --- a/tests/filecheck/xdsl_pipeline.mlir +++ b/tests/filecheck/xdsl_pipeline.mlir @@ -1,4 +1,4 @@ -// RUN: xdsl-opt -p canonicalize,cse,shape-inference,stencil-bufferize,convert-stencil-to-ll-mlir,scf-parallel-loop-tiling{parallel-loop-tile-sizes=64,0},printf-to-llvm,canonicalize %s | filecheck %s +// RUN: xdsl-opt -p "canonicalize,cse,shape-inference,stencil-bufferize,convert-stencil-to-ll-mlir,scf-parallel-loop-tiling{parallel-loop-tile-sizes=64,0},printf-to-llvm,canonicalize,cse" %s | filecheck %s builtin.module { func.func @Kernel(%f2_vec0 : !stencil.field<[-2,5]x[-2,5]xf32>, %f2_vec1 : !stencil.field<[-2,5]x[-2,5]xf32>, %timers : !llvm.ptr) { @@ -81,98 +81,63 @@ builtin.module { // CHECK-NEXT: %0 = func.call @timer_start() : () -> f64 // CHECK-NEXT: %time_m = arith.constant 0 : index // CHECK-NEXT: %time_M = arith.constant 1 : index -// CHECK-NEXT: %1 = arith.constant 1 : index -// CHECK-NEXT: %2 = arith.addi %time_M, %1 : index -// CHECK-NEXT: %step = arith.constant 1 : index -// CHECK-NEXT: %3, %4 = scf.for %time = %time_m to %2 step %step iter_args(%f2_t0 = %f2_vec0, %f2_t1 = %f2_vec1) -> (memref<7x7xf32>, memref<7x7xf32>) { -// CHECK-NEXT: %f2_t1_storeview = "memref.subview"(%f2_t1) <{"static_offsets" = array, "static_sizes" = array, "static_strides" = array, "operandSegmentSizes" = array}> : (memref<7x7xf32>) -> memref<3x3xf32, strided<[7, 1], offset: 16>> -// CHECK-NEXT: %f2_t0_loadview = "memref.subview"(%f2_t0) <{"static_offsets" = array, "static_sizes" = array, "static_strides" = array, "operandSegmentSizes" = array}> : (memref<7x7xf32>) -> memref<5x5xf32, strided<[7, 1], offset: 16>> -// CHECK-NEXT: %5 = arith.constant 0 : index -// CHECK-NEXT: %6 = arith.constant 0 : index -// CHECK-NEXT: %7 = arith.constant 1 : index -// CHECK-NEXT: %8 = arith.constant 1 : index -// CHECK-NEXT: %9 = arith.constant 3 : index -// CHECK-NEXT: %10 = arith.constant 3 : index -// CHECK-NEXT: %11 = arith.constant 0 : index -// CHECK-NEXT: %12 = arith.constant 64 : index -// CHECK-NEXT: %13 = arith.muli %7, %12 : index -// CHECK-NEXT: "scf.parallel"(%5, %9, %13) <{"operandSegmentSizes" = array}> ({ -// CHECK-NEXT: ^0(%14 : index): -// CHECK-NEXT: %15 = "affine.min"(%12, %9, %14) <{"map" = affine_map<(d0, d1, d2) -> (d0, (d1 + (d2 * -1)))>}> : (index, index, index) -> index -// CHECK-NEXT: "scf.parallel"(%11, %6, %15, %10, %7, %8) <{"operandSegmentSizes" = array}> ({ -// CHECK-NEXT: ^1(%16 : index, %17 : index): -// CHECK-NEXT: %18 = arith.addi %14, %16 : index -// CHECK-NEXT: %19 = arith.constant 5.000000e-01 : f32 +// CHECK-NEXT: %1 = arith.addi %time_M, %time_M : index +// CHECK-NEXT: %2, %3 = scf.for %time = %time_m to %1 step %time_M iter_args(%f2_t0 = %f2_vec0, %f2_t1 = %f2_vec1) -> (memref<7x7xf32>, memref<7x7xf32>) { +// CHECK-NEXT: %4 = memref.subview %f2_t1[2, 2] [7, 7] [1, 1] : memref<7x7xf32> to memref<7x7xf32, strided<[7, 1], offset: 16>> +// CHECK-NEXT: %f2_t0_blk = memref.subview %f2_t0[2, 2] [7, 7] [1, 1] : memref<7x7xf32> to memref<7x7xf32, strided<[7, 1], offset: 16>> +// CHECK-NEXT: %5 = arith.constant 3 : index +// CHECK-NEXT: %6 = arith.constant 64 : index +// CHECK-NEXT: %7 = arith.muli %time_M, %6 : index +// CHECK-NEXT: "scf.parallel"(%time_m, %5, %7) <{"operandSegmentSizes" = array}> ({ +// CHECK-NEXT: ^0(%8 : index): +// CHECK-NEXT: %9 = "affine.min"(%6, %5, %8) <{"map" = affine_map<(d0, d1, d2) -> (d0, (d1 + (d2 * -1)))>}> : (index, index, index) -> index +// CHECK-NEXT: "scf.parallel"(%time_m, %time_m, %9, %5, %time_M, %time_M) <{"operandSegmentSizes" = array}> ({ +// CHECK-NEXT: ^1(%10 : index, %11 : index): +// CHECK-NEXT: %12 = arith.addi %8, %10 : index // CHECK-NEXT: %h_x = arith.constant 5.000000e-01 : f32 -// CHECK-NEXT: %20 = arith.constant -2 : i64 -// CHECK-NEXT: %21 = "math.fpowi"(%h_x, %20) : (f32, i64) -> f32 -// CHECK-NEXT: %22 = arith.constant -1 : index -// CHECK-NEXT: %23 = arith.addi %18, %22 : index -// CHECK-NEXT: %24 = memref.load %f2_t0_loadview[%23, %17] : memref<5x5xf32, strided<[7, 1], offset: 16>> -// CHECK-NEXT: %25 = arith.mulf %21, %24 : f32 -// CHECK-NEXT: %h_x_1 = arith.constant 5.000000e-01 : f32 -// CHECK-NEXT: %26 = arith.constant -2 : i64 -// CHECK-NEXT: %27 = "math.fpowi"(%h_x_1, %26) : (f32, i64) -> f32 -// CHECK-NEXT: %28 = arith.constant 1 : index -// CHECK-NEXT: %29 = arith.addi %18, %28 : index -// CHECK-NEXT: %30 = memref.load %f2_t0_loadview[%29, %17] : memref<5x5xf32, strided<[7, 1], offset: 16>> -// CHECK-NEXT: %31 = arith.mulf %27, %30 : f32 -// CHECK-NEXT: %32 = arith.constant -2.000000e+00 : f32 -// CHECK-NEXT: %h_x_2 = arith.constant 5.000000e-01 : f32 -// CHECK-NEXT: %33 = arith.constant -2 : i64 -// CHECK-NEXT: %34 = "math.fpowi"(%h_x_2, %33) : (f32, i64) -> f32 -// CHECK-NEXT: %35 = memref.load %f2_t0_loadview[%18, %17] : memref<5x5xf32, strided<[7, 1], offset: 16>> -// CHECK-NEXT: %36 = arith.mulf %32, %34 : f32 -// CHECK-NEXT: %37 = arith.mulf %36, %35 : f32 -// CHECK-NEXT: %38 = arith.addf %25, %31 : f32 -// CHECK-NEXT: %39 = arith.addf %38, %37 : f32 -// CHECK-NEXT: %40 = arith.mulf %19, %39 : f32 -// CHECK-NEXT: %41 = arith.constant 5.000000e-01 : f32 -// CHECK-NEXT: %h_y = arith.constant 5.000000e-01 : f32 -// CHECK-NEXT: %42 = arith.constant -2 : i64 -// CHECK-NEXT: %43 = "math.fpowi"(%h_y, %42) : (f32, i64) -> f32 -// CHECK-NEXT: %44 = arith.constant -1 : index -// CHECK-NEXT: %45 = arith.addi %17, %44 : index -// CHECK-NEXT: %46 = memref.load %f2_t0_loadview[%18, %45] : memref<5x5xf32, strided<[7, 1], offset: 16>> -// CHECK-NEXT: %47 = arith.mulf %43, %46 : f32 -// CHECK-NEXT: %h_y_1 = arith.constant 5.000000e-01 : f32 -// CHECK-NEXT: %48 = arith.constant -2 : i64 -// CHECK-NEXT: %49 = "math.fpowi"(%h_y_1, %48) : (f32, i64) -> f32 -// CHECK-NEXT: %50 = arith.constant 1 : index -// CHECK-NEXT: %51 = arith.addi %17, %50 : index -// CHECK-NEXT: %52 = memref.load %f2_t0_loadview[%18, %51] : memref<5x5xf32, strided<[7, 1], offset: 16>> -// CHECK-NEXT: %53 = arith.mulf %49, %52 : f32 -// CHECK-NEXT: %54 = arith.constant -2.000000e+00 : f32 -// CHECK-NEXT: %h_y_2 = arith.constant 5.000000e-01 : f32 -// CHECK-NEXT: %55 = arith.constant -2 : i64 -// CHECK-NEXT: %56 = "math.fpowi"(%h_y_2, %55) : (f32, i64) -> f32 -// CHECK-NEXT: %57 = memref.load %f2_t0_loadview[%18, %17] : memref<5x5xf32, strided<[7, 1], offset: 16>> -// CHECK-NEXT: %58 = arith.mulf %54, %56 : f32 -// CHECK-NEXT: %59 = arith.mulf %58, %57 : f32 -// CHECK-NEXT: %60 = arith.addf %47, %53 : f32 -// CHECK-NEXT: %61 = arith.addf %60, %59 : f32 -// CHECK-NEXT: %62 = arith.mulf %41, %61 : f32 +// CHECK-NEXT: %13 = arith.constant -2 : i64 +// CHECK-NEXT: %14 = "math.fpowi"(%h_x, %13) : (f32, i64) -> f32 +// CHECK-NEXT: %15 = arith.constant -1 : index +// CHECK-NEXT: %16 = arith.addi %12, %15 : index +// CHECK-NEXT: %17 = memref.load %f2_t0_blk[%16, %11] : memref<7x7xf32, strided<[7, 1], offset: 16>> +// CHECK-NEXT: %18 = arith.mulf %14, %17 : f32 +// CHECK-NEXT: %19 = arith.addi %12, %time_M : index +// CHECK-NEXT: %20 = memref.load %f2_t0_blk[%19, %11] : memref<7x7xf32, strided<[7, 1], offset: 16>> +// CHECK-NEXT: %21 = arith.mulf %14, %20 : f32 +// CHECK-NEXT: %22 = arith.constant -2.000000e+00 : f32 +// CHECK-NEXT: %23 = memref.load %f2_t0_blk[%12, %11] : memref<7x7xf32, strided<[7, 1], offset: 16>> +// CHECK-NEXT: %24 = arith.mulf %22, %14 : f32 +// CHECK-NEXT: %25 = arith.mulf %24, %23 : f32 +// CHECK-NEXT: %26 = arith.addf %18, %21 : f32 +// CHECK-NEXT: %27 = arith.addf %26, %25 : f32 +// CHECK-NEXT: %28 = arith.mulf %h_x, %27 : f32 +// CHECK-NEXT: %29 = arith.addi %11, %15 : index +// CHECK-NEXT: %30 = memref.load %f2_t0_blk[%12, %29] : memref<7x7xf32, strided<[7, 1], offset: 16>> +// CHECK-NEXT: %31 = arith.mulf %14, %30 : f32 +// CHECK-NEXT: %32 = arith.addi %11, %time_M : index +// CHECK-NEXT: %33 = memref.load %f2_t0_blk[%12, %32] : memref<7x7xf32, strided<[7, 1], offset: 16>> +// CHECK-NEXT: %34 = arith.mulf %14, %33 : f32 +// CHECK-NEXT: %35 = arith.addf %31, %34 : f32 +// CHECK-NEXT: %36 = arith.addf %35, %25 : f32 +// CHECK-NEXT: %37 = arith.mulf %h_x, %36 : f32 // CHECK-NEXT: %dt = arith.constant 1.000000e-01 : f32 -// CHECK-NEXT: %63 = arith.constant -1 : i64 -// CHECK-NEXT: %64 = "math.fpowi"(%dt, %63) : (f32, i64) -> f32 -// CHECK-NEXT: %65 = memref.load %f2_t0_loadview[%18, %17] : memref<5x5xf32, strided<[7, 1], offset: 16>> -// CHECK-NEXT: %66 = arith.mulf %64, %65 : f32 -// CHECK-NEXT: %67 = arith.addf %40, %62 : f32 -// CHECK-NEXT: %68 = arith.addf %67, %66 : f32 -// CHECK-NEXT: %dt_1 = arith.constant 1.000000e-01 : f32 -// CHECK-NEXT: %69 = arith.mulf %68, %dt_1 : f32 -// CHECK-NEXT: memref.store %69, %f2_t1_storeview[%18, %17] : memref<3x3xf32, strided<[7, 1], offset: 16>> +// CHECK-NEXT: %38 = arith.constant -1 : i64 +// CHECK-NEXT: %39 = "math.fpowi"(%dt, %38) : (f32, i64) -> f32 +// CHECK-NEXT: %40 = arith.mulf %39, %23 : f32 +// CHECK-NEXT: %41 = arith.addf %28, %37 : f32 +// CHECK-NEXT: %42 = arith.addf %41, %40 : f32 +// CHECK-NEXT: %43 = arith.mulf %42, %dt : f32 +// CHECK-NEXT: memref.store %43, %4[%12, %11] : memref<7x7xf32, strided<[7, 1], offset: 16>> // CHECK-NEXT: scf.yield // CHECK-NEXT: }) : (index, index, index, index, index, index) -> () // CHECK-NEXT: scf.yield // CHECK-NEXT: }) : (index, index, index) -> () // CHECK-NEXT: scf.yield %f2_t1, %f2_t0 : memref<7x7xf32>, memref<7x7xf32> // CHECK-NEXT: } -// CHECK-NEXT: %70 = func.call @timer_end(%0) : (f64) -> f64 -// CHECK-NEXT: "llvm.store"(%70, %timers) <{"ordering" = 0 : i64}> : (f64, !llvm.ptr) -> () +// CHECK-NEXT: %44 = func.call @timer_end(%0) : (f64) -> f64 +// CHECK-NEXT: "llvm.store"(%44, %timers) <{"ordering" = 0 : i64}> : (f64, !llvm.ptr) -> () // CHECK-NEXT: func.return // CHECK-NEXT: } // CHECK-NEXT: func.func private @timer_start() -> f64 // CHECK-NEXT: func.func private @timer_end(f64) -> f64 // CHECK-NEXT: } -// CHECK-NEXT: diff --git a/tests/filecheck/xdsl_pipeline_openmp.mlir b/tests/filecheck/xdsl_pipeline_openmp.mlir index 127763e598..76394b2249 100644 --- a/tests/filecheck/xdsl_pipeline_openmp.mlir +++ b/tests/filecheck/xdsl_pipeline_openmp.mlir @@ -207,196 +207,108 @@ builtin.module { // CHECK-NEXT: %0 = func.call @timer_start() : () -> f64 // CHECK-NEXT: %time_m = arith.constant 0 : index // CHECK-NEXT: %time_M = arith.constant 250 : index -// CHECK-NEXT: %1 = arith.constant 1 : index -// CHECK-NEXT: %2 = arith.addi %time_M, %1 : index // CHECK-NEXT: %step = arith.constant 1 : index -// CHECK-NEXT: %3, %4 = scf.for %time = %time_m to %2 step %step iter_args(%u_t0 = %u_vec0, %u_t1 = %u_vec1) -> (memref<158x158x158xf32>, memref<158x158x158xf32>) { -// CHECK-NEXT: %u_t1_storeview = "memref.subview"(%u_t1) <{"static_offsets" = array, "static_sizes" = array, "static_strides" = array, "operandSegmentSizes" = array}> : (memref<158x158x158xf32>) -> memref<150x150x150xf32, strided<[24964, 158, 1], offset: 100492>> -// CHECK-NEXT: %u_t0_loadview = "memref.subview"(%u_t0) <{"static_offsets" = array, "static_sizes" = array, "static_strides" = array, "operandSegmentSizes" = array}> : (memref<158x158x158xf32>) -> memref<154x154x154xf32, strided<[24964, 158, 1], offset: 100492>> -// CHECK-NEXT: %5 = arith.constant 0 : index -// CHECK-NEXT: %6 = arith.constant 0 : index -// CHECK-NEXT: %7 = arith.constant 0 : index -// CHECK-NEXT: %8 = arith.constant 1 : index -// CHECK-NEXT: %9 = arith.constant 1 : index -// CHECK-NEXT: %10 = arith.constant 1 : index -// CHECK-NEXT: %11 = arith.constant 150 : index -// CHECK-NEXT: %12 = arith.constant 150 : index -// CHECK-NEXT: %13 = arith.constant 150 : index -// CHECK-NEXT: %14 = arith.constant 0 : index -// CHECK-NEXT: %15 = arith.constant 64 : index -// CHECK-NEXT: %16 = arith.constant 64 : index -// CHECK-NEXT: %17 = arith.muli %8, %15 : index -// CHECK-NEXT: %18 = arith.muli %9, %16 : index -// CHECK-NEXT: "scf.parallel"(%5, %6, %11, %12, %17, %18) <{"operandSegmentSizes" = array}> ({ -// CHECK-NEXT: ^0(%19 : index, %20 : index): -// CHECK-NEXT: %21 = "affine.min"(%15, %11, %19) <{"map" = affine_map<(d0, d1, d2) -> (d0, (d1 + (d2 * -1)))>}> : (index, index, index) -> index -// CHECK-NEXT: %22 = "affine.min"(%16, %12, %20) <{"map" = affine_map<(d0, d1, d2) -> (d0, (d1 + (d2 * -1)))>}> : (index, index, index) -> index -// CHECK-NEXT: "scf.parallel"(%14, %14, %7, %21, %22, %13, %8, %9, %10) <{"operandSegmentSizes" = array}> ({ -// CHECK-NEXT: ^1(%23 : index, %24 : index, %25 : index): -// CHECK-NEXT: %26 = arith.addi %19, %23 : index -// CHECK-NEXT: %27 = arith.addi %20, %24 : index +// CHECK-NEXT: %1 = arith.addi %time_M, %step : index +// CHECK-NEXT: %2, %3 = scf.for %time = %time_m to %1 step %step iter_args(%u_t0 = %u_vec0, %u_t1 = %u_vec1) -> (memref<158x158x158xf32>, memref<158x158x158xf32>) { +// CHECK-NEXT: %u_t1_storeview = memref.subview %u_t1[4, 4, 4] [150, 150, 150] [1, 1, 1] : memref<158x158x158xf32> to memref<150x150x150xf32, strided<[24964, 158, 1], offset: 100492>> +// CHECK-NEXT: %u_t0_loadview = memref.subview %u_t0[4, 4, 4] [154, 154, 154] [1, 1, 1] : memref<158x158x158xf32> to memref<154x154x154xf32, strided<[24964, 158, 1], offset: 100492>> +// CHECK-NEXT: %4 = arith.constant 150 : index +// CHECK-NEXT: %5 = arith.constant 64 : index +// CHECK-NEXT: %6 = arith.muli %step, %5 : index +// CHECK-NEXT: "scf.parallel"(%time_m, %time_m, %4, %4, %6, %6) <{"operandSegmentSizes" = array}> ({ +// CHECK-NEXT: ^0(%7 : index, %8 : index): +// CHECK-NEXT: %9 = "affine.min"(%5, %4, %7) <{"map" = affine_map<(d0, d1, d2) -> (d0, (d1 + (d2 * -1)))>}> : (index, index, index) -> index +// CHECK-NEXT: %10 = "affine.min"(%5, %4, %8) <{"map" = affine_map<(d0, d1, d2) -> (d0, (d1 + (d2 * -1)))>}> : (index, index, index) -> index +// CHECK-NEXT: "scf.parallel"(%time_m, %time_m, %time_m, %9, %10, %4, %step, %step, %step) <{"operandSegmentSizes" = array}> ({ +// CHECK-NEXT: ^1(%11 : index, %12 : index, %13 : index): +// CHECK-NEXT: %14 = arith.addi %7, %11 : index +// CHECK-NEXT: %15 = arith.addi %8, %12 : index // CHECK-NEXT: %dt = arith.constant 6.717825e-07 : f32 -// CHECK-NEXT: %28 = arith.constant -1 : i64 -// CHECK-NEXT: %29 = "math.fpowi"(%dt, %28) : (f32, i64) -> f32 -// CHECK-NEXT: %30 = memref.load %u_t0_loadview[%26, %27, %25] : memref<154x154x154xf32, strided<[24964, 158, 1], offset: 100492>> -// CHECK-NEXT: %31 = arith.mulf %29, %30 : f32 -// CHECK-NEXT: %32 = arith.constant 1.333333e+00 : f32 +// CHECK-NEXT: %16 = arith.constant -1 : i64 +// CHECK-NEXT: %17 = "math.fpowi"(%dt, %16) : (f32, i64) -> f32 +// CHECK-NEXT: %18 = memref.load %u_t0_loadview[%14, %15, %13] : memref<154x154x154xf32, strided<[24964, 158, 1], offset: 100492>> +// CHECK-NEXT: %19 = arith.mulf %17, %18 : f32 +// CHECK-NEXT: %20 = arith.constant 1.333333e+00 : f32 // CHECK-NEXT: %h_x = arith.constant 1.342282e-02 : f32 -// CHECK-NEXT: %33 = arith.constant -2 : i64 -// CHECK-NEXT: %34 = "math.fpowi"(%h_x, %33) : (f32, i64) -> f32 -// CHECK-NEXT: %35 = arith.constant -1 : index -// CHECK-NEXT: %36 = arith.addi %26, %35 : index -// CHECK-NEXT: %37 = memref.load %u_t0_loadview[%36, %27, %25] : memref<154x154x154xf32, strided<[24964, 158, 1], offset: 100492>> -// CHECK-NEXT: %38 = arith.mulf %32, %34 : f32 -// CHECK-NEXT: %39 = arith.mulf %38, %37 : f32 -// CHECK-NEXT: %40 = arith.constant 1.333333e+00 : f32 -// CHECK-NEXT: %h_x_1 = arith.constant 1.342282e-02 : f32 -// CHECK-NEXT: %41 = arith.constant -2 : i64 -// CHECK-NEXT: %42 = "math.fpowi"(%h_x_1, %41) : (f32, i64) -> f32 -// CHECK-NEXT: %43 = arith.constant 1 : index -// CHECK-NEXT: %44 = arith.addi %26, %43 : index -// CHECK-NEXT: %45 = memref.load %u_t0_loadview[%44, %27, %25] : memref<154x154x154xf32, strided<[24964, 158, 1], offset: 100492>> -// CHECK-NEXT: %46 = arith.mulf %40, %42 : f32 -// CHECK-NEXT: %47 = arith.mulf %46, %45 : f32 -// CHECK-NEXT: %48 = arith.constant -2.500000e+00 : f32 -// CHECK-NEXT: %h_x_2 = arith.constant 1.342282e-02 : f32 -// CHECK-NEXT: %49 = arith.constant -2 : i64 -// CHECK-NEXT: %50 = "math.fpowi"(%h_x_2, %49) : (f32, i64) -> f32 -// CHECK-NEXT: %51 = memref.load %u_t0_loadview[%26, %27, %25] : memref<154x154x154xf32, strided<[24964, 158, 1], offset: 100492>> -// CHECK-NEXT: %52 = arith.mulf %48, %50 : f32 -// CHECK-NEXT: %53 = arith.mulf %52, %51 : f32 -// CHECK-NEXT: %54 = arith.constant -8.333333e-02 : f32 -// CHECK-NEXT: %h_x_3 = arith.constant 1.342282e-02 : f32 -// CHECK-NEXT: %55 = arith.constant -2 : i64 -// CHECK-NEXT: %56 = "math.fpowi"(%h_x_3, %55) : (f32, i64) -> f32 -// CHECK-NEXT: %57 = arith.constant -2 : index -// CHECK-NEXT: %58 = arith.addi %26, %57 : index -// CHECK-NEXT: %59 = memref.load %u_t0_loadview[%58, %27, %25] : memref<154x154x154xf32, strided<[24964, 158, 1], offset: 100492>> -// CHECK-NEXT: %60 = arith.mulf %54, %56 : f32 -// CHECK-NEXT: %61 = arith.mulf %60, %59 : f32 -// CHECK-NEXT: %62 = arith.constant -8.333333e-02 : f32 -// CHECK-NEXT: %h_x_4 = arith.constant 1.342282e-02 : f32 -// CHECK-NEXT: %63 = arith.constant -2 : i64 -// CHECK-NEXT: %64 = "math.fpowi"(%h_x_4, %63) : (f32, i64) -> f32 -// CHECK-NEXT: %65 = arith.constant 2 : index -// CHECK-NEXT: %66 = arith.addi %26, %65 : index -// CHECK-NEXT: %67 = memref.load %u_t0_loadview[%66, %27, %25] : memref<154x154x154xf32, strided<[24964, 158, 1], offset: 100492>> -// CHECK-NEXT: %68 = arith.mulf %62, %64 : f32 -// CHECK-NEXT: %69 = arith.mulf %68, %67 : f32 -// CHECK-NEXT: %70 = arith.addf %39, %47 : f32 -// CHECK-NEXT: %71 = arith.addf %70, %53 : f32 -// CHECK-NEXT: %72 = arith.addf %71, %61 : f32 -// CHECK-NEXT: %73 = arith.addf %72, %69 : f32 -// CHECK-NEXT: %74 = arith.constant 1.333333e+00 : f32 -// CHECK-NEXT: %h_y = arith.constant 1.342282e-02 : f32 -// CHECK-NEXT: %75 = arith.constant -2 : i64 -// CHECK-NEXT: %76 = "math.fpowi"(%h_y, %75) : (f32, i64) -> f32 -// CHECK-NEXT: %77 = arith.constant -1 : index -// CHECK-NEXT: %78 = arith.addi %27, %77 : index -// CHECK-NEXT: %79 = memref.load %u_t0_loadview[%26, %78, %25] : memref<154x154x154xf32, strided<[24964, 158, 1], offset: 100492>> -// CHECK-NEXT: %80 = arith.mulf %74, %76 : f32 -// CHECK-NEXT: %81 = arith.mulf %80, %79 : f32 -// CHECK-NEXT: %82 = arith.constant 1.333333e+00 : f32 -// CHECK-NEXT: %h_y_1 = arith.constant 1.342282e-02 : f32 -// CHECK-NEXT: %83 = arith.constant -2 : i64 -// CHECK-NEXT: %84 = "math.fpowi"(%h_y_1, %83) : (f32, i64) -> f32 -// CHECK-NEXT: %85 = arith.constant 1 : index -// CHECK-NEXT: %86 = arith.addi %27, %85 : index -// CHECK-NEXT: %87 = memref.load %u_t0_loadview[%26, %86, %25] : memref<154x154x154xf32, strided<[24964, 158, 1], offset: 100492>> -// CHECK-NEXT: %88 = arith.mulf %82, %84 : f32 -// CHECK-NEXT: %89 = arith.mulf %88, %87 : f32 -// CHECK-NEXT: %90 = arith.constant -2.500000e+00 : f32 -// CHECK-NEXT: %h_y_2 = arith.constant 1.342282e-02 : f32 -// CHECK-NEXT: %91 = arith.constant -2 : i64 -// CHECK-NEXT: %92 = "math.fpowi"(%h_y_2, %91) : (f32, i64) -> f32 -// CHECK-NEXT: %93 = memref.load %u_t0_loadview[%26, %27, %25] : memref<154x154x154xf32, strided<[24964, 158, 1], offset: 100492>> -// CHECK-NEXT: %94 = arith.mulf %90, %92 : f32 -// CHECK-NEXT: %95 = arith.mulf %94, %93 : f32 -// CHECK-NEXT: %96 = arith.constant -8.333333e-02 : f32 -// CHECK-NEXT: %h_y_3 = arith.constant 1.342282e-02 : f32 -// CHECK-NEXT: %97 = arith.constant -2 : i64 -// CHECK-NEXT: %98 = "math.fpowi"(%h_y_3, %97) : (f32, i64) -> f32 -// CHECK-NEXT: %99 = arith.constant -2 : index -// CHECK-NEXT: %100 = arith.addi %27, %99 : index -// CHECK-NEXT: %101 = memref.load %u_t0_loadview[%26, %100, %25] : memref<154x154x154xf32, strided<[24964, 158, 1], offset: 100492>> -// CHECK-NEXT: %102 = arith.mulf %96, %98 : f32 -// CHECK-NEXT: %103 = arith.mulf %102, %101 : f32 -// CHECK-NEXT: %104 = arith.constant -8.333333e-02 : f32 -// CHECK-NEXT: %h_y_4 = arith.constant 1.342282e-02 : f32 -// CHECK-NEXT: %105 = arith.constant -2 : i64 -// CHECK-NEXT: %106 = "math.fpowi"(%h_y_4, %105) : (f32, i64) -> f32 -// CHECK-NEXT: %107 = arith.constant 2 : index -// CHECK-NEXT: %108 = arith.addi %27, %107 : index -// CHECK-NEXT: %109 = memref.load %u_t0_loadview[%26, %108, %25] : memref<154x154x154xf32, strided<[24964, 158, 1], offset: 100492>> -// CHECK-NEXT: %110 = arith.mulf %104, %106 : f32 -// CHECK-NEXT: %111 = arith.mulf %110, %109 : f32 -// CHECK-NEXT: %112 = arith.addf %81, %89 : f32 -// CHECK-NEXT: %113 = arith.addf %112, %95 : f32 -// CHECK-NEXT: %114 = arith.addf %113, %103 : f32 -// CHECK-NEXT: %115 = arith.addf %114, %111 : f32 -// CHECK-NEXT: %116 = arith.constant 1.333333e+00 : f32 -// CHECK-NEXT: %h_z = arith.constant 1.342282e-02 : f32 -// CHECK-NEXT: %117 = arith.constant -2 : i64 -// CHECK-NEXT: %118 = "math.fpowi"(%h_z, %117) : (f32, i64) -> f32 -// CHECK-NEXT: %119 = arith.constant -1 : index -// CHECK-NEXT: %120 = arith.addi %25, %119 : index -// CHECK-NEXT: %121 = memref.load %u_t0_loadview[%26, %27, %120] : memref<154x154x154xf32, strided<[24964, 158, 1], offset: 100492>> -// CHECK-NEXT: %122 = arith.mulf %116, %118 : f32 -// CHECK-NEXT: %123 = arith.mulf %122, %121 : f32 -// CHECK-NEXT: %124 = arith.constant 1.333333e+00 : f32 -// CHECK-NEXT: %h_z_1 = arith.constant 1.342282e-02 : f32 -// CHECK-NEXT: %125 = arith.constant -2 : i64 -// CHECK-NEXT: %126 = "math.fpowi"(%h_z_1, %125) : (f32, i64) -> f32 -// CHECK-NEXT: %127 = arith.constant 1 : index -// CHECK-NEXT: %128 = arith.addi %25, %127 : index -// CHECK-NEXT: %129 = memref.load %u_t0_loadview[%26, %27, %128] : memref<154x154x154xf32, strided<[24964, 158, 1], offset: 100492>> -// CHECK-NEXT: %130 = arith.mulf %124, %126 : f32 -// CHECK-NEXT: %131 = arith.mulf %130, %129 : f32 -// CHECK-NEXT: %132 = arith.constant -2.500000e+00 : f32 -// CHECK-NEXT: %h_z_2 = arith.constant 1.342282e-02 : f32 -// CHECK-NEXT: %133 = arith.constant -2 : i64 -// CHECK-NEXT: %134 = "math.fpowi"(%h_z_2, %133) : (f32, i64) -> f32 -// CHECK-NEXT: %135 = memref.load %u_t0_loadview[%26, %27, %25] : memref<154x154x154xf32, strided<[24964, 158, 1], offset: 100492>> -// CHECK-NEXT: %136 = arith.mulf %132, %134 : f32 -// CHECK-NEXT: %137 = arith.mulf %136, %135 : f32 -// CHECK-NEXT: %138 = arith.constant -8.333333e-02 : f32 -// CHECK-NEXT: %h_z_3 = arith.constant 1.342282e-02 : f32 -// CHECK-NEXT: %139 = arith.constant -2 : i64 -// CHECK-NEXT: %140 = "math.fpowi"(%h_z_3, %139) : (f32, i64) -> f32 -// CHECK-NEXT: %141 = arith.constant -2 : index -// CHECK-NEXT: %142 = arith.addi %25, %141 : index -// CHECK-NEXT: %143 = memref.load %u_t0_loadview[%26, %27, %142] : memref<154x154x154xf32, strided<[24964, 158, 1], offset: 100492>> -// CHECK-NEXT: %144 = arith.mulf %138, %140 : f32 -// CHECK-NEXT: %145 = arith.mulf %144, %143 : f32 -// CHECK-NEXT: %146 = arith.constant -8.333333e-02 : f32 -// CHECK-NEXT: %h_z_4 = arith.constant 1.342282e-02 : f32 -// CHECK-NEXT: %147 = arith.constant -2 : i64 -// CHECK-NEXT: %148 = "math.fpowi"(%h_z_4, %147) : (f32, i64) -> f32 -// CHECK-NEXT: %149 = arith.constant 2 : index -// CHECK-NEXT: %150 = arith.addi %25, %149 : index -// CHECK-NEXT: %151 = memref.load %u_t0_loadview[%26, %27, %150] : memref<154x154x154xf32, strided<[24964, 158, 1], offset: 100492>> -// CHECK-NEXT: %152 = arith.mulf %146, %148 : f32 -// CHECK-NEXT: %153 = arith.mulf %152, %151 : f32 -// CHECK-NEXT: %154 = arith.addf %123, %131 : f32 -// CHECK-NEXT: %155 = arith.addf %154, %137 : f32 -// CHECK-NEXT: %156 = arith.addf %155, %145 : f32 -// CHECK-NEXT: %157 = arith.addf %156, %153 : f32 -// CHECK-NEXT: %158 = arith.addf %73, %115 : f32 -// CHECK-NEXT: %159 = arith.addf %158, %157 : f32 +// CHECK-NEXT: %21 = arith.constant -2 : i64 +// CHECK-NEXT: %22 = "math.fpowi"(%h_x, %21) : (f32, i64) -> f32 +// CHECK-NEXT: %23 = arith.constant -1 : index +// CHECK-NEXT: %24 = arith.addi %14, %23 : index +// CHECK-NEXT: %25 = memref.load %u_t0_loadview[%24, %15, %13] : memref<154x154x154xf32, strided<[24964, 158, 1], offset: 100492>> +// CHECK-NEXT: %26 = arith.mulf %20, %22 : f32 +// CHECK-NEXT: %27 = arith.mulf %26, %25 : f32 +// CHECK-NEXT: %28 = arith.addi %14, %step : index +// CHECK-NEXT: %29 = memref.load %u_t0_loadview[%28, %15, %13] : memref<154x154x154xf32, strided<[24964, 158, 1], offset: 100492>> +// CHECK-NEXT: %30 = arith.mulf %26, %29 : f32 +// CHECK-NEXT: %31 = arith.constant -2.500000e+00 : f32 +// CHECK-NEXT: %32 = memref.load %u_t0_loadview[%14, %15, %13] : memref<154x154x154xf32, strided<[24964, 158, 1], offset: 100492>> +// CHECK-NEXT: %33 = arith.mulf %31, %22 : f32 +// CHECK-NEXT: %34 = arith.mulf %33, %32 : f32 +// CHECK-NEXT: %35 = arith.constant -8.333333e-02 : f32 +// CHECK-NEXT: %36 = arith.constant -2 : index +// CHECK-NEXT: %37 = arith.addi %14, %36 : index +// CHECK-NEXT: %38 = memref.load %u_t0_loadview[%37, %15, %13] : memref<154x154x154xf32, strided<[24964, 158, 1], offset: 100492>> +// CHECK-NEXT: %39 = arith.mulf %35, %22 : f32 +// CHECK-NEXT: %40 = arith.mulf %39, %38 : f32 +// CHECK-NEXT: %41 = arith.constant 2 : index +// CHECK-NEXT: %42 = arith.addi %14, %41 : index +// CHECK-NEXT: %43 = memref.load %u_t0_loadview[%42, %15, %13] : memref<154x154x154xf32, strided<[24964, 158, 1], offset: 100492>> +// CHECK-NEXT: %44 = arith.mulf %39, %43 : f32 +// CHECK-NEXT: %45 = arith.addf %27, %30 : f32 +// CHECK-NEXT: %46 = arith.addf %45, %34 : f32 +// CHECK-NEXT: %47 = arith.addf %46, %40 : f32 +// CHECK-NEXT: %48 = arith.addf %47, %44 : f32 +// CHECK-NEXT: %49 = arith.addi %15, %23 : index +// CHECK-NEXT: %50 = memref.load %u_t0_loadview[%14, %49, %13] : memref<154x154x154xf32, strided<[24964, 158, 1], offset: 100492>> +// CHECK-NEXT: %51 = arith.mulf %26, %50 : f32 +// CHECK-NEXT: %52 = arith.addi %15, %step : index +// CHECK-NEXT: %53 = memref.load %u_t0_loadview[%14, %52, %13] : memref<154x154x154xf32, strided<[24964, 158, 1], offset: 100492>> +// CHECK-NEXT: %54 = arith.mulf %26, %53 : f32 +// CHECK-NEXT: %55 = memref.load %u_t0_loadview[%14, %15, %13] : memref<154x154x154xf32, strided<[24964, 158, 1], offset: 100492>> +// CHECK-NEXT: %56 = arith.mulf %33, %55 : f32 +// CHECK-NEXT: %57 = arith.addi %15, %36 : index +// CHECK-NEXT: %58 = memref.load %u_t0_loadview[%14, %57, %13] : memref<154x154x154xf32, strided<[24964, 158, 1], offset: 100492>> +// CHECK-NEXT: %59 = arith.mulf %39, %58 : f32 +// CHECK-NEXT: %60 = arith.addi %15, %41 : index +// CHECK-NEXT: %61 = memref.load %u_t0_loadview[%14, %60, %13] : memref<154x154x154xf32, strided<[24964, 158, 1], offset: 100492>> +// CHECK-NEXT: %62 = arith.mulf %39, %61 : f32 +// CHECK-NEXT: %63 = arith.addf %51, %54 : f32 +// CHECK-NEXT: %64 = arith.addf %63, %56 : f32 +// CHECK-NEXT: %65 = arith.addf %64, %59 : f32 +// CHECK-NEXT: %66 = arith.addf %65, %62 : f32 +// CHECK-NEXT: %67 = arith.addi %13, %23 : index +// CHECK-NEXT: %68 = memref.load %u_t0_loadview[%14, %15, %67] : memref<154x154x154xf32, strided<[24964, 158, 1], offset: 100492>> +// CHECK-NEXT: %69 = arith.mulf %26, %68 : f32 +// CHECK-NEXT: %70 = arith.addi %13, %step : index +// CHECK-NEXT: %71 = memref.load %u_t0_loadview[%14, %15, %70] : memref<154x154x154xf32, strided<[24964, 158, 1], offset: 100492>> +// CHECK-NEXT: %72 = arith.mulf %26, %71 : f32 +// CHECK-NEXT: %73 = memref.load %u_t0_loadview[%14, %15, %13] : memref<154x154x154xf32, strided<[24964, 158, 1], offset: 100492>> +// CHECK-NEXT: %74 = arith.mulf %33, %73 : f32 +// CHECK-NEXT: %75 = arith.addi %13, %36 : index +// CHECK-NEXT: %76 = memref.load %u_t0_loadview[%14, %15, %75] : memref<154x154x154xf32, strided<[24964, 158, 1], offset: 100492>> +// CHECK-NEXT: %77 = arith.mulf %39, %76 : f32 +// CHECK-NEXT: %78 = arith.addi %13, %41 : index +// CHECK-NEXT: %79 = memref.load %u_t0_loadview[%14, %15, %78] : memref<154x154x154xf32, strided<[24964, 158, 1], offset: 100492>> +// CHECK-NEXT: %80 = arith.mulf %39, %79 : f32 +// CHECK-NEXT: %81 = arith.addf %69, %72 : f32 +// CHECK-NEXT: %82 = arith.addf %81, %74 : f32 +// CHECK-NEXT: %83 = arith.addf %82, %77 : f32 +// CHECK-NEXT: %84 = arith.addf %83, %80 : f32 +// CHECK-NEXT: %85 = arith.addf %48, %66 : f32 +// CHECK-NEXT: %86 = arith.addf %85, %84 : f32 // CHECK-NEXT: %a = arith.constant 9.000000e-01 : f32 -// CHECK-NEXT: %160 = arith.mulf %159, %a : f32 -// CHECK-NEXT: %161 = arith.addf %31, %160 : f32 -// CHECK-NEXT: %dt_1 = arith.constant 6.717825e-07 : f32 -// CHECK-NEXT: %162 = arith.mulf %161, %dt_1 : f32 -// CHECK-NEXT: memref.store %162, %u_t1_storeview[%26, %27, %25] : memref<150x150x150xf32, strided<[24964, 158, 1], offset: 100492>> +// CHECK-NEXT: %87 = arith.mulf %86, %a : f32 +// CHECK-NEXT: %88 = arith.addf %19, %87 : f32 +// CHECK-NEXT: %89 = arith.mulf %88, %dt : f32 +// CHECK-NEXT: memref.store %89, %u_t1_storeview[%14, %15, %13] : memref<150x150x150xf32, strided<[24964, 158, 1], offset: 100492>> // CHECK-NEXT: scf.yield // CHECK-NEXT: }) : (index, index, index, index, index, index, index, index, index) -> () // CHECK-NEXT: scf.yield // CHECK-NEXT: }) : (index, index, index, index, index, index) -> () // CHECK-NEXT: scf.yield %u_t1, %u_t0 : memref<158x158x158xf32>, memref<158x158x158xf32> // CHECK-NEXT: } -// CHECK-NEXT: %163 = func.call @timer_end(%0) : (f64) -> f64 -// CHECK-NEXT: "llvm.store"(%163, %timers) <{"ordering" = 0 : i64}> : (f64, !llvm.ptr) -> () +// CHECK-NEXT: %90 = func.call @timer_end(%0) : (f64) -> f64 +// CHECK-NEXT: "llvm.store"(%90, %timers) <{"ordering" = 0 : i64}> : (f64, !llvm.ptr) -> () // CHECK-NEXT: func.return // CHECK-NEXT: } // CHECK-NEXT: func.func private @timer_start() -> f64 From 9f1c990b313df933103fd985a4c802c7aa5993cc Mon Sep 17 00:00:00 2001 From: Emilien Bauer Date: Tue, 20 Aug 2024 17:57:46 +0100 Subject: [PATCH 11/25] Update pytests. --- tests/test_xdsl_op_correctness.py | 10 ++++++---- tests/test_xdsl_operator.py | 5 +++-- tests/test_xdsl_passes.py | 5 ++++- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/tests/test_xdsl_op_correctness.py b/tests/test_xdsl_op_correctness.py index f0cb28d794..eb2a10c890 100644 --- a/tests/test_xdsl_op_correctness.py +++ b/tests/test_xdsl_op_correctness.py @@ -91,20 +91,22 @@ def test_u_and_v_conversion(): scffor_ops = list(ops[6].regions[0].blocks[0].ops) - assert len(scffor_ops) == 7 + assert len(scffor_ops) == 9 # First assert isinstance(scffor_ops[0], LoadOp) assert isinstance(scffor_ops[1], LoadOp) assert isinstance(scffor_ops[2], ApplyOp) assert isinstance(scffor_ops[3], StoreOp) + assert isinstance(scffor_ops[4], LoadOp) # Second - assert isinstance(scffor_ops[4], ApplyOp) - assert isinstance(scffor_ops[5], StoreOp) + assert isinstance(scffor_ops[5], ApplyOp) + assert isinstance(scffor_ops[6], StoreOp) + assert isinstance(scffor_ops[7], LoadOp) # Yield - assert isinstance(scffor_ops[6], Yield) + assert isinstance(scffor_ops[8], Yield) assert type(ops[7] == Call) assert type(ops[8] == StoreOp) diff --git a/tests/test_xdsl_operator.py b/tests/test_xdsl_operator.py index 9fa0fee21a..f06df254de 100644 --- a/tests/test_xdsl_operator.py +++ b/tests/test_xdsl_operator.py @@ -32,15 +32,16 @@ def test_create_xdsl_operator(): assert type(ops[6] == For) scffor_ops = list(ops[6].regions[0].blocks[0].ops) - assert len(scffor_ops) == 4 + assert len(scffor_ops) == 5 # First assert isinstance(scffor_ops[0], LoadOp) assert isinstance(scffor_ops[1], ApplyOp) assert isinstance(scffor_ops[2], StoreOp) + assert isinstance(scffor_ops[3], LoadOp) # Yield - assert isinstance(scffor_ops[3], Yield) + assert isinstance(scffor_ops[4], Yield) assert type(ops[7] == Call) assert type(ops[8] == StoreOp) diff --git a/tests/test_xdsl_passes.py b/tests/test_xdsl_passes.py index 9fce974b8d..ff937d92b2 100644 --- a/tests/test_xdsl_passes.py +++ b/tests/test_xdsl_passes.py @@ -90,10 +90,13 @@ def test_xdsl_III(): scffor_ops = list(ops[6].regions[0].blocks[0].ops) + assert len(scffor_ops) == 5 + assert isinstance(scffor_ops[0], LoadOp) assert isinstance(scffor_ops[1], ApplyOp) assert isinstance(scffor_ops[2], StoreOp) - assert isinstance(scffor_ops[3], Yield) + assert isinstance(scffor_ops[3], LoadOp) + assert isinstance(scffor_ops[4], Yield) assert type(ops[7] == Call) assert type(ops[8] == StoreOp) From 233b70ab008363b4786288f9c56f3092d4facb09 Mon Sep 17 00:00:00 2001 From: Emilien Bauer Date: Wed, 21 Aug 2024 12:03:57 +0100 Subject: [PATCH 12/25] Bump to xDSL main. --- .github/workflows/ci-lit.yml | 2 +- .github/workflows/ci-mlir-mpi-openmp.yml | 2 +- .github/workflows/ci-mlir-mpi.yml | 2 +- .github/workflows/ci-mlir-openmp.yml | 2 +- .github/workflows/ci-mlir.yml | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci-lit.yml b/.github/workflows/ci-lit.yml index a187172c3e..47b2cb718f 100644 --- a/.github/workflows/ci-lit.yml +++ b/.github/workflows/ci-lit.yml @@ -27,7 +27,7 @@ jobs: - name: Install requirements and xDSL run: | - pip install git+https://github.com/xdslproject/xdsl@935675efd470505028466917126f8cc8f64ce4e3 + pip install git+https://github.com/xdslproject/xdsl@bd700e9665c040d478a4f3bfa286ede66216b5ed pip install -e .[tests] - name: Execute lit tests diff --git a/.github/workflows/ci-mlir-mpi-openmp.yml b/.github/workflows/ci-mlir-mpi-openmp.yml index 7cd7d7cbb1..04bd3d8208 100644 --- a/.github/workflows/ci-mlir-mpi-openmp.yml +++ b/.github/workflows/ci-mlir-mpi-openmp.yml @@ -36,7 +36,7 @@ jobs: run: | pip install -e .[tests] pip install mpi4py - pip install git+https://github.com/xdslproject/xdsl@935675efd470505028466917126f8cc8f64ce4e3 + pip install git+https://github.com/xdslproject/xdsl@bd700e9665c040d478a4f3bfa286ede66216b5ed - name: Test with MPI + openmp run: | diff --git a/.github/workflows/ci-mlir-mpi.yml b/.github/workflows/ci-mlir-mpi.yml index 6dd145604e..bff3de5933 100644 --- a/.github/workflows/ci-mlir-mpi.yml +++ b/.github/workflows/ci-mlir-mpi.yml @@ -36,7 +36,7 @@ jobs: run: | pip install -e .[tests] pip install mpi4py - pip install git+https://github.com/xdslproject/xdsl@935675efd470505028466917126f8cc8f64ce4e3 + pip install git+https://github.com/xdslproject/xdsl@bd700e9665c040d478a4f3bfa286ede66216b5ed - name: Test with MPI - no Openmp run: | diff --git a/.github/workflows/ci-mlir-openmp.yml b/.github/workflows/ci-mlir-openmp.yml index 39bc907225..9b80ddf909 100644 --- a/.github/workflows/ci-mlir-openmp.yml +++ b/.github/workflows/ci-mlir-openmp.yml @@ -36,7 +36,7 @@ jobs: run: | pip install -e .[tests] pip install mpi4py - pip install git+https://github.com/xdslproject/xdsl@935675efd470505028466917126f8cc8f64ce4e3 + pip install git+https://github.com/xdslproject/xdsl@bd700e9665c040d478a4f3bfa286ede66216b5ed - name: Test no-MPI, Openmp run: | diff --git a/.github/workflows/ci-mlir.yml b/.github/workflows/ci-mlir.yml index 796882d73e..9f9d43106f 100644 --- a/.github/workflows/ci-mlir.yml +++ b/.github/workflows/ci-mlir.yml @@ -35,7 +35,7 @@ jobs: - name: Install requirements and xDSL run: | pip install -e .[tests] - pip install git+https://github.com/xdslproject/xdsl@935675efd470505028466917126f8cc8f64ce4e3 + pip install git+https://github.com/xdslproject/xdsl@bd700e9665c040d478a4f3bfa286ede66216b5ed - name: Test no-MPI, no-Openmp run: | From ef1fbf56c6e44e5652e7b1ec60ec46526aab8baa Mon Sep 17 00:00:00 2001 From: Emilien Bauer Date: Wed, 21 Aug 2024 14:22:57 +0100 Subject: [PATCH 13/25] Bump to xDSL supporting inplace. --- .github/workflows/ci-lit.yml | 2 +- .github/workflows/ci-mlir-mpi-openmp.yml | 2 +- .github/workflows/ci-mlir-mpi.yml | 2 +- .github/workflows/ci-mlir-openmp.yml | 2 +- .github/workflows/ci-mlir.yml | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci-lit.yml b/.github/workflows/ci-lit.yml index 47b2cb718f..35b24eec97 100644 --- a/.github/workflows/ci-lit.yml +++ b/.github/workflows/ci-lit.yml @@ -27,7 +27,7 @@ jobs: - name: Install requirements and xDSL run: | - pip install git+https://github.com/xdslproject/xdsl@bd700e9665c040d478a4f3bfa286ede66216b5ed + pip install git+https://github.com/xdslproject/xdsl@c86953132b9bd52b6b9dbbd3463bde52c5c15fda pip install -e .[tests] - name: Execute lit tests diff --git a/.github/workflows/ci-mlir-mpi-openmp.yml b/.github/workflows/ci-mlir-mpi-openmp.yml index 04bd3d8208..d952fb4765 100644 --- a/.github/workflows/ci-mlir-mpi-openmp.yml +++ b/.github/workflows/ci-mlir-mpi-openmp.yml @@ -36,7 +36,7 @@ jobs: run: | pip install -e .[tests] pip install mpi4py - pip install git+https://github.com/xdslproject/xdsl@bd700e9665c040d478a4f3bfa286ede66216b5ed + pip install git+https://github.com/xdslproject/xdsl@c86953132b9bd52b6b9dbbd3463bde52c5c15fda - name: Test with MPI + openmp run: | diff --git a/.github/workflows/ci-mlir-mpi.yml b/.github/workflows/ci-mlir-mpi.yml index bff3de5933..b4bc3c34f3 100644 --- a/.github/workflows/ci-mlir-mpi.yml +++ b/.github/workflows/ci-mlir-mpi.yml @@ -36,7 +36,7 @@ jobs: run: | pip install -e .[tests] pip install mpi4py - pip install git+https://github.com/xdslproject/xdsl@bd700e9665c040d478a4f3bfa286ede66216b5ed + pip install git+https://github.com/xdslproject/xdsl@c86953132b9bd52b6b9dbbd3463bde52c5c15fda - name: Test with MPI - no Openmp run: | diff --git a/.github/workflows/ci-mlir-openmp.yml b/.github/workflows/ci-mlir-openmp.yml index 9b80ddf909..60471ccdc8 100644 --- a/.github/workflows/ci-mlir-openmp.yml +++ b/.github/workflows/ci-mlir-openmp.yml @@ -36,7 +36,7 @@ jobs: run: | pip install -e .[tests] pip install mpi4py - pip install git+https://github.com/xdslproject/xdsl@bd700e9665c040d478a4f3bfa286ede66216b5ed + pip install git+https://github.com/xdslproject/xdsl@c86953132b9bd52b6b9dbbd3463bde52c5c15fda - name: Test no-MPI, Openmp run: | diff --git a/.github/workflows/ci-mlir.yml b/.github/workflows/ci-mlir.yml index 9f9d43106f..9ab998511f 100644 --- a/.github/workflows/ci-mlir.yml +++ b/.github/workflows/ci-mlir.yml @@ -35,7 +35,7 @@ jobs: - name: Install requirements and xDSL run: | pip install -e .[tests] - pip install git+https://github.com/xdslproject/xdsl@bd700e9665c040d478a4f3bfa286ede66216b5ed + pip install git+https://github.com/xdslproject/xdsl@c86953132b9bd52b6b9dbbd3463bde52c5c15fda - name: Test no-MPI, no-Openmp run: | From 6e0673ca3a2c36f927cd8f53722b554b78a55216 Mon Sep 17 00:00:00 2001 From: Emilien Bauer Date: Wed, 21 Aug 2024 14:30:46 +0100 Subject: [PATCH 14/25] Litlle dictionary fix? --- devito/ir/xdsl_iet/cluster_to_ssa.py | 5 ++--- tests/test_xdsl_op_correctness.py | 3 --- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/devito/ir/xdsl_iet/cluster_to_ssa.py b/devito/ir/xdsl_iet/cluster_to_ssa.py index 48d2489d12..f540e405a5 100644 --- a/devito/ir/xdsl_iet/cluster_to_ssa.py +++ b/devito/ir/xdsl_iet/cluster_to_ssa.py @@ -386,13 +386,12 @@ def build_stencil_step(self, dim: SteppingDimension, eq: LoweredEq) -> None: lb = stencil.IndexAttr.get(*([0] * len(shape))) ub = stencil.IndexAttr.get(*shape) - store = stencil.StoreOp.get( apply.res[0], - self.function_values[self.out_time_buffer], + self.block_args[self.out_time_buffer], stencil.StencilBoundsAttr(zip(lb, ub)), ) - load = stencil.LoadOp.get(self.function_values[self.out_time_buffer]) + load = stencil.LoadOp.get(self.block_args[self.out_time_buffer]) load.res.name_hint = f"{write_function.name}_t{self.out_time_buffer[1]}_temp" # noqa self.temps[self.out_time_buffer] = load.res diff --git a/tests/test_xdsl_op_correctness.py b/tests/test_xdsl_op_correctness.py index eb2a10c890..4dd5b7e758 100644 --- a/tests/test_xdsl_op_correctness.py +++ b/tests/test_xdsl_op_correctness.py @@ -130,9 +130,6 @@ def test_symbol_I(): assert ops[0].result.name_hint == a.name assert type(ops[0] == Return) - -# This test should fail, as we are trying to use an inplace operation -@pytest.mark.xfail(reason="Cannot store to a field that is loaded from") def test_inplace(): # Define a simple Devito Operator grid = Grid(shape=(3, 3)) From b9b1bbcbfcdc10fd8b78fa0b15e86a4f32cece8c Mon Sep 17 00:00:00 2001 From: Emilien Bauer Date: Wed, 21 Aug 2024 14:32:58 +0100 Subject: [PATCH 15/25] Flake --- tests/test_xdsl_op_correctness.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_xdsl_op_correctness.py b/tests/test_xdsl_op_correctness.py index 4dd5b7e758..6f8c5efed9 100644 --- a/tests/test_xdsl_op_correctness.py +++ b/tests/test_xdsl_op_correctness.py @@ -130,6 +130,7 @@ def test_symbol_I(): assert ops[0].result.name_hint == a.name assert type(ops[0] == Return) + def test_inplace(): # Define a simple Devito Operator grid = Grid(shape=(3, 3)) From 26218f263b0025d3fee53811f88c3f2183dfc49e Mon Sep 17 00:00:00 2001 From: Emilien Bauer Date: Wed, 21 Aug 2024 14:41:41 +0100 Subject: [PATCH 16/25] Try this dictionary Some more comments would seem useful. --- devito/ir/xdsl_iet/cluster_to_ssa.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/devito/ir/xdsl_iet/cluster_to_ssa.py b/devito/ir/xdsl_iet/cluster_to_ssa.py index f540e405a5..240d32ec09 100644 --- a/devito/ir/xdsl_iet/cluster_to_ssa.py +++ b/devito/ir/xdsl_iet/cluster_to_ssa.py @@ -386,12 +386,12 @@ def build_stencil_step(self, dim: SteppingDimension, eq: LoweredEq) -> None: lb = stencil.IndexAttr.get(*([0] * len(shape))) ub = stencil.IndexAttr.get(*shape) - store = stencil.StoreOp.get( + stencil.StoreOp.get( apply.res[0], - self.block_args[self.out_time_buffer], + self.function_args[self.out_time_buffer], stencil.StencilBoundsAttr(zip(lb, ub)), ) - load = stencil.LoadOp.get(self.block_args[self.out_time_buffer]) + load = stencil.LoadOp.get(self.function_args[self.out_time_buffer]) load.res.name_hint = f"{write_function.name}_t{self.out_time_buffer[1]}_temp" # noqa self.temps[self.out_time_buffer] = load.res From 89e774ce100a4ff5fb62fb5f295be499376721df Mon Sep 17 00:00:00 2001 From: Emilien Bauer Date: Wed, 21 Aug 2024 14:52:49 +0100 Subject: [PATCH 17/25] Dictionary tweak. --- devito/ir/xdsl_iet/cluster_to_ssa.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/devito/ir/xdsl_iet/cluster_to_ssa.py b/devito/ir/xdsl_iet/cluster_to_ssa.py index 48d2489d12..11c4d8a718 100644 --- a/devito/ir/xdsl_iet/cluster_to_ssa.py +++ b/devito/ir/xdsl_iet/cluster_to_ssa.py @@ -198,7 +198,7 @@ def _visit_math_nodes(self, dim: SteppingDimension, node: Expr, if output_indexed is not None: space_offsets = ([node.indices[d] - output_indexed.indices[d] for d in node.function.space_dimensions]) - temp = self.function_values[(node.function, time_offset)] + temp = self.apply_temps[(node.function, time_offset)] access = stencil.AccessOp.get(temp, space_offsets) return access.res # Otherwise, generate a load op @@ -378,8 +378,6 @@ def build_stencil_step(self, dim: SteppingDimension, eq: LoweredEq) -> None: apply_arg.name_hint = apply_op.name_hint.replace("temp", "blk") self.apply_temps = {k: v for k, v in zip(read_functions, apply.region.block.args)} - # Update the function values with the new temps - self.function_values |= self.apply_temps with ImplicitBuilder(apply.region.block): stencil.ReturnOp.get([self._visit_math_nodes(dim, eq.rhs, eq.lhs)]) From a2a69225f2f0bbf35f105336ce42a0058afdcecf Mon Sep 17 00:00:00 2001 From: Emilien Bauer Date: Wed, 21 Aug 2024 15:01:10 +0100 Subject: [PATCH 18/25] Sync --- devito/ir/xdsl_iet/cluster_to_ssa.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/devito/ir/xdsl_iet/cluster_to_ssa.py b/devito/ir/xdsl_iet/cluster_to_ssa.py index f2f2383bb6..11c4d8a718 100644 --- a/devito/ir/xdsl_iet/cluster_to_ssa.py +++ b/devito/ir/xdsl_iet/cluster_to_ssa.py @@ -384,12 +384,13 @@ def build_stencil_step(self, dim: SteppingDimension, eq: LoweredEq) -> None: lb = stencil.IndexAttr.get(*([0] * len(shape))) ub = stencil.IndexAttr.get(*shape) - stencil.StoreOp.get( + + store = stencil.StoreOp.get( apply.res[0], - self.function_args[self.out_time_buffer], + self.function_values[self.out_time_buffer], stencil.StencilBoundsAttr(zip(lb, ub)), ) - load = stencil.LoadOp.get(self.function_args[self.out_time_buffer]) + load = stencil.LoadOp.get(self.function_values[self.out_time_buffer]) load.res.name_hint = f"{write_function.name}_t{self.out_time_buffer[1]}_temp" # noqa self.temps[self.out_time_buffer] = load.res From 5366f327d7ce57e423d32a0b5ac06d9ae49dd496 Mon Sep 17 00:00:00 2001 From: Emilien Bauer Date: Wed, 21 Aug 2024 17:11:21 +0100 Subject: [PATCH 19/25] Revert more subtle inplace test and present a working one.. --- tests/test_xdsl_op_correctness.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/tests/test_xdsl_op_correctness.py b/tests/test_xdsl_op_correctness.py index 6f8c5efed9..2934770e97 100644 --- a/tests/test_xdsl_op_correctness.py +++ b/tests/test_xdsl_op_correctness.py @@ -131,7 +131,22 @@ def test_symbol_I(): assert type(ops[0] == Return) -def test_inplace(): +def test_inplace_I(): + # Define a simple Devito Operator + grid = Grid(shape=(3, 3)) + u = TimeFunction(name="u", grid=grid, time_order=2) + + u.data[:] = 0.0001 + + eq0 = Eq(u, u + 2) + + xdsl_op = Operator([eq0], opt="xdsl") + xdsl_op.apply(time_M=5, dt=0.1) + + +# This test should fail, as we are trying to use an inplace operation with some dependencies +@pytest.mark.xfail(reason="Cannot store to a field that is loaded from") +def test_inplace_II(): # Define a simple Devito Operator grid = Grid(shape=(3, 3)) u = TimeFunction(name='u', grid=grid, time_order=2) From fb54a023db42938f4b5edfde6639399d78c6b989 Mon Sep 17 00:00:00 2001 From: Emilien Bauer Date: Wed, 21 Aug 2024 17:13:10 +0100 Subject: [PATCH 20/25] Flake. --- tests/test_xdsl_op_correctness.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_xdsl_op_correctness.py b/tests/test_xdsl_op_correctness.py index 2934770e97..8e74b23992 100644 --- a/tests/test_xdsl_op_correctness.py +++ b/tests/test_xdsl_op_correctness.py @@ -144,7 +144,8 @@ def test_inplace_I(): xdsl_op.apply(time_M=5, dt=0.1) -# This test should fail, as we are trying to use an inplace operation with some dependencies +# This test should fail, as we are trying to use an inplace operation with some +# dependencies @pytest.mark.xfail(reason="Cannot store to a field that is loaded from") def test_inplace_II(): # Define a simple Devito Operator From 3eafa472e67483fc04dccf925d7ad3e0cda3ebd1 Mon Sep 17 00:00:00 2001 From: Emilien Bauer Date: Wed, 21 Aug 2024 17:29:26 +0100 Subject: [PATCH 21/25] Lift another xfail --- tests/test_xdsl_base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_xdsl_base.py b/tests/test_xdsl_base.py index 72ddfcb16a..4cba6d9a7a 100644 --- a/tests/test_xdsl_base.py +++ b/tests/test_xdsl_base.py @@ -948,7 +948,6 @@ def test_function_III(): assert np.isclose(norm(v), devito_norm_v) -@pytest.mark.xfail(reason="Operation does not verify: Cannot Load and Store the same field!") # noqa def test_function_IV(): # Define a Devito Operator with multiple eqs grid = Grid(shape=(4, 4)) From 06293fee2a0630ffac8b5f1f6c6ee82786f37028 Mon Sep 17 00:00:00 2001 From: Emilien Bauer Date: Wed, 21 Aug 2024 17:47:52 +0100 Subject: [PATCH 22/25] Implement return type conversion. --- devito/ir/xdsl_iet/cluster_to_ssa.py | 37 +++++++++++++-------- tests/test_xdsl_base.py | 49 +++++++++++++--------------- 2 files changed, 47 insertions(+), 39 deletions(-) diff --git a/devito/ir/xdsl_iet/cluster_to_ssa.py b/devito/ir/xdsl_iet/cluster_to_ssa.py index 37713b5322..66a9741c35 100644 --- a/devito/ir/xdsl_iet/cluster_to_ssa.py +++ b/devito/ir/xdsl_iet/cluster_to_ssa.py @@ -287,7 +287,7 @@ def _visit_math_nodes(self, dim: SteppingDimension, node: Expr, SSAargs = (self._visit_math_nodes(dim, arg, output_indexed) for arg in node.args) return reduce(lambda x, y : arith.AndI(x, y).result, SSAargs) - + # Trigonometric functions elif isinstance(node, sin): assert len(node.args) == 1, "Expected single argument for sin." @@ -298,13 +298,13 @@ def _visit_math_nodes(self, dim: SteppingDimension, node: Expr, assert len(node.args) == 1, "Expected single argument for cos." return math.CosOp(self._visit_math_nodes(dim, node.args[0], output_indexed)).result - + elif isinstance(node, tan): assert len(node.args) == 1, "Expected single argument for TanOp." - + return math.TanOp(self._visit_math_nodes(dim, node.args[0], output_indexed)).result - + elif isinstance(node, Relational): if isinstance(node, GreaterThan): mnemonic = "sge" @@ -382,7 +382,20 @@ def build_stencil_step(self, dim: SteppingDimension, eq: LoweredEq) -> None: self.function_values |= self.apply_temps with ImplicitBuilder(apply.region.block): - stencil.ReturnOp.get([self._visit_math_nodes(dim, eq.rhs, eq.lhs)]) + result = self._visit_math_nodes(dim, eq.rhs, eq.lhs) + expected_type = apply.res[0].type.get_element_type() + match expected_type: + case result.type: + pass + case builtin.f32: + if result.type == IndexType(): + result = arith.IndexCastOp(result, builtin.i64).result + result = arith.SIToFPOp(result, builtin.f32).result + case builtin.IndexType: + result = arith.IndexCastOp(result, IndexType()).result + case _: + raise Exception(f"Unexpected result type {type(result)}") + stencil.ReturnOp.get([result]) lb = stencil.IndexAttr.get(*([0] * len(shape))) ub = stencil.IndexAttr.get(*shape) @@ -439,7 +452,6 @@ def build_condition(self, dim: SteppingDimension, eq: BooleanFunction): self.build_generic_step_expression(dim, eq) scf.Yield() - def build_time_loop( self, eqs: list[Any], step_dim: SteppingDimension, **kwargs ): @@ -450,7 +462,7 @@ def build_time_loop( ub = iet_ssa.LoadSymbolic.get( step_dim.symbolic_max._C_name, IndexType() ) - + one = arith.Constant.from_int_and_width(1, IndexType()) # Devito iterates from time_m to time_M *inclusive*, MLIR only takes @@ -497,7 +509,7 @@ def build_time_loop( for i, (f, t) in enumerate(self.time_buffers) } self.function_values |= self.block_args - + # Name the block argument for debugging for (f, t), arg in self.block_args.items(): arg.name_hint = f"{f.name}_t{t}" @@ -513,8 +525,7 @@ def build_time_loop( def lower_devito_Eqs(self, eqs: list[Any], **kwargs): # Lower devito Equations to xDSL - - + for eq in eqs: lowered = self.operator._lower_exprs(as_tuple(eq), **kwargs) if isinstance(eq, Eq): @@ -546,7 +557,7 @@ def _lower_injection(self, eqs: list[LoweredEq]): lb = arith.Constant.from_int_and_width(int(lower), IndexType()) else: raise NotImplementedError(f"Lower bound of type {type(lower)} not supported") - + try: name = interval.dim.symbolic_min.name except: @@ -633,7 +644,7 @@ def convert(self, eqs: Iterable[Eq], **kwargs) -> ModuleOp: # Instantiate the module. self.function_values: dict[tuple[Function, int], SSAValue] = {} self.symbol_values: dict[str, SSAValue] = {} - + module = ModuleOp(Region([block := Block([])])) with ImplicitBuilder(block): # Get all functions used in the equations @@ -647,7 +658,7 @@ def convert(self, eqs: Iterable[Eq], **kwargs) -> ModuleOp: functions.add(f.function) elif isinstance(eq, Injection): - + functions.add(eq.field.function) for f in retrieve_functions(eq.expr): if isinstance(f, PointSource): diff --git a/tests/test_xdsl_base.py b/tests/test_xdsl_base.py index 39aa97828b..23eeaf97ce 100644 --- a/tests/test_xdsl_base.py +++ b/tests/test_xdsl_base.py @@ -972,6 +972,20 @@ def test_function_IV(): assert np.isclose(norm(u), devito_norm_u) +def test_function_V(): + grid = Grid(shape=(5, 5)) + x, y = grid.dimensions + + f = Function(name="f", grid=grid) + + eqns = [Eq(f, 2)] + + op = Operator(eqns, opt="xdsl") + op.apply() + + assert np.all(f.data == 2) + + class TestTrigonometric(object): @pytest.mark.parametrize('deg, exp', ([90.0, 3.5759869], [30.0, 3.9521265], @@ -1028,37 +1042,20 @@ def test_tan(self, deg, exp): assert np.isclose(norm(u), exp, rtol=1e-4) -class TestOperatorUnsupported(object): +def test_forward_assignment(): + # simple forward assignment - @pytest.mark.xfail(reason="stencil.return operation does not verify for i64") - def test_forward_assignment(self): - # simple forward assignment - - grid = Grid(shape=(4, 4)) - u = TimeFunction(name="u", grid=grid, space_order=2) - u.data[:, :, :] = 0 - - eq0 = Eq(u.forward, 1) - - op = Operator([eq0], opt='xdsl') - - op.apply(time_M=1) - - assert np.isclose(norm(u), 5.6584, rtol=0.001) - - @pytest.mark.xfail(reason="stencil.return operation does not verify for i64") - def test_function(self): - grid = Grid(shape=(5, 5)) - x, y = grid.dimensions + grid = Grid(shape=(4, 4)) + u = TimeFunction(name="u", grid=grid, space_order=2) + u.data[:, :, :] = 0 - f = Function(name="f", grid=grid) + eq0 = Eq(u.forward, 1) - eqns = [Eq(f, 2)] + op = Operator([eq0], opt='xdsl') - op = Operator(eqns, opt='xdsl') - op.apply() + op.apply(time_M=1) - assert np.all(f.data == 4) + assert np.isclose(norm(u), 5.6584, rtol=0.001) class TestElastic(): From 74746f56f7b1e91037aff9a22b841979974b3ffa Mon Sep 17 00:00:00 2001 From: Emilien Bauer Date: Wed, 21 Aug 2024 17:55:59 +0100 Subject: [PATCH 23/25] Update to PR. --- .github/workflows/ci-lit.yml | 2 +- .github/workflows/ci-mlir-mpi-openmp.yml | 2 +- .github/workflows/ci-mlir-mpi.yml | 2 +- .github/workflows/ci-mlir-openmp.yml | 2 +- .github/workflows/ci-mlir.yml | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci-lit.yml b/.github/workflows/ci-lit.yml index 35b24eec97..ead38f6031 100644 --- a/.github/workflows/ci-lit.yml +++ b/.github/workflows/ci-lit.yml @@ -27,7 +27,7 @@ jobs: - name: Install requirements and xDSL run: | - pip install git+https://github.com/xdslproject/xdsl@c86953132b9bd52b6b9dbbd3463bde52c5c15fda + pip install git+https://github.com/xdslproject/xdsl@c0bd8d3fb67b63950601a43b0779d2c35e29f3b7 pip install -e .[tests] - name: Execute lit tests diff --git a/.github/workflows/ci-mlir-mpi-openmp.yml b/.github/workflows/ci-mlir-mpi-openmp.yml index d952fb4765..a95283c43a 100644 --- a/.github/workflows/ci-mlir-mpi-openmp.yml +++ b/.github/workflows/ci-mlir-mpi-openmp.yml @@ -36,7 +36,7 @@ jobs: run: | pip install -e .[tests] pip install mpi4py - pip install git+https://github.com/xdslproject/xdsl@c86953132b9bd52b6b9dbbd3463bde52c5c15fda + pip install git+https://github.com/xdslproject/xdsl@c0bd8d3fb67b63950601a43b0779d2c35e29f3b7 - name: Test with MPI + openmp run: | diff --git a/.github/workflows/ci-mlir-mpi.yml b/.github/workflows/ci-mlir-mpi.yml index b4bc3c34f3..1f8718fa5b 100644 --- a/.github/workflows/ci-mlir-mpi.yml +++ b/.github/workflows/ci-mlir-mpi.yml @@ -36,7 +36,7 @@ jobs: run: | pip install -e .[tests] pip install mpi4py - pip install git+https://github.com/xdslproject/xdsl@c86953132b9bd52b6b9dbbd3463bde52c5c15fda + pip install git+https://github.com/xdslproject/xdsl@c0bd8d3fb67b63950601a43b0779d2c35e29f3b7 - name: Test with MPI - no Openmp run: | diff --git a/.github/workflows/ci-mlir-openmp.yml b/.github/workflows/ci-mlir-openmp.yml index 60471ccdc8..f8f2e7493c 100644 --- a/.github/workflows/ci-mlir-openmp.yml +++ b/.github/workflows/ci-mlir-openmp.yml @@ -36,7 +36,7 @@ jobs: run: | pip install -e .[tests] pip install mpi4py - pip install git+https://github.com/xdslproject/xdsl@c86953132b9bd52b6b9dbbd3463bde52c5c15fda + pip install git+https://github.com/xdslproject/xdsl@c0bd8d3fb67b63950601a43b0779d2c35e29f3b7 - name: Test no-MPI, Openmp run: | diff --git a/.github/workflows/ci-mlir.yml b/.github/workflows/ci-mlir.yml index 9ab998511f..a4d0b6ffcf 100644 --- a/.github/workflows/ci-mlir.yml +++ b/.github/workflows/ci-mlir.yml @@ -35,7 +35,7 @@ jobs: - name: Install requirements and xDSL run: | pip install -e .[tests] - pip install git+https://github.com/xdslproject/xdsl@c86953132b9bd52b6b9dbbd3463bde52c5c15fda + pip install git+https://github.com/xdslproject/xdsl@c0bd8d3fb67b63950601a43b0779d2c35e29f3b7 - name: Test no-MPI, no-Openmp run: | From 51e6f97ded417939105087eabc5a7fe63f730412 Mon Sep 17 00:00:00 2001 From: Emilien Bauer Date: Fri, 30 Aug 2024 14:47:02 +0100 Subject: [PATCH 24/25] GPU pipeline canonicalization --- devito/xdsl_core/xdsl_gpu.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/devito/xdsl_core/xdsl_gpu.py b/devito/xdsl_core/xdsl_gpu.py index 9f281015b3..c84cf14492 100644 --- a/devito/xdsl_core/xdsl_gpu.py +++ b/devito/xdsl_core/xdsl_gpu.py @@ -140,7 +140,10 @@ def _jit_compile(self): def generate_XDSL_GPU_PIPELINE(): passes = [ + "canonicalize", + "cse", "shape-inference", + "stencil-bufferize", "convert-stencil-to-ll-mlir", "reconcile-unrealized-casts", "printf-to-llvm", From 72c909cf72923fee0b5102f3e21b29186fc72675 Mon Sep 17 00:00:00 2001 From: Emilien Bauer Date: Fri, 30 Aug 2024 15:14:47 +0100 Subject: [PATCH 25/25] Bump to `main`. --- .github/workflows/ci-lit.yml | 2 +- .github/workflows/ci-mlir-mpi-openmp.yml | 2 +- .github/workflows/ci-mlir-mpi.yml | 2 +- .github/workflows/ci-mlir-openmp.yml | 2 +- .github/workflows/ci-mlir.yml | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci-lit.yml b/.github/workflows/ci-lit.yml index ead38f6031..75c2f350db 100644 --- a/.github/workflows/ci-lit.yml +++ b/.github/workflows/ci-lit.yml @@ -27,7 +27,7 @@ jobs: - name: Install requirements and xDSL run: | - pip install git+https://github.com/xdslproject/xdsl@c0bd8d3fb67b63950601a43b0779d2c35e29f3b7 + pip install git+https://github.com/xdslproject/xdsl@48b530615ab877e980d1f6339c18e63970011311 pip install -e .[tests] - name: Execute lit tests diff --git a/.github/workflows/ci-mlir-mpi-openmp.yml b/.github/workflows/ci-mlir-mpi-openmp.yml index a95283c43a..900fbbc13e 100644 --- a/.github/workflows/ci-mlir-mpi-openmp.yml +++ b/.github/workflows/ci-mlir-mpi-openmp.yml @@ -36,7 +36,7 @@ jobs: run: | pip install -e .[tests] pip install mpi4py - pip install git+https://github.com/xdslproject/xdsl@c0bd8d3fb67b63950601a43b0779d2c35e29f3b7 + pip install git+https://github.com/xdslproject/xdsl@48b530615ab877e980d1f6339c18e63970011311 - name: Test with MPI + openmp run: | diff --git a/.github/workflows/ci-mlir-mpi.yml b/.github/workflows/ci-mlir-mpi.yml index 1f8718fa5b..d699dda4ef 100644 --- a/.github/workflows/ci-mlir-mpi.yml +++ b/.github/workflows/ci-mlir-mpi.yml @@ -36,7 +36,7 @@ jobs: run: | pip install -e .[tests] pip install mpi4py - pip install git+https://github.com/xdslproject/xdsl@c0bd8d3fb67b63950601a43b0779d2c35e29f3b7 + pip install git+https://github.com/xdslproject/xdsl@48b530615ab877e980d1f6339c18e63970011311 - name: Test with MPI - no Openmp run: | diff --git a/.github/workflows/ci-mlir-openmp.yml b/.github/workflows/ci-mlir-openmp.yml index f8f2e7493c..f78144d33a 100644 --- a/.github/workflows/ci-mlir-openmp.yml +++ b/.github/workflows/ci-mlir-openmp.yml @@ -36,7 +36,7 @@ jobs: run: | pip install -e .[tests] pip install mpi4py - pip install git+https://github.com/xdslproject/xdsl@c0bd8d3fb67b63950601a43b0779d2c35e29f3b7 + pip install git+https://github.com/xdslproject/xdsl@48b530615ab877e980d1f6339c18e63970011311 - name: Test no-MPI, Openmp run: | diff --git a/.github/workflows/ci-mlir.yml b/.github/workflows/ci-mlir.yml index a4d0b6ffcf..fc16c030f2 100644 --- a/.github/workflows/ci-mlir.yml +++ b/.github/workflows/ci-mlir.yml @@ -35,7 +35,7 @@ jobs: - name: Install requirements and xDSL run: | pip install -e .[tests] - pip install git+https://github.com/xdslproject/xdsl@c0bd8d3fb67b63950601a43b0779d2c35e29f3b7 + pip install git+https://github.com/xdslproject/xdsl@48b530615ab877e980d1f6339c18e63970011311 - name: Test no-MPI, no-Openmp run: |