From 8087f95720108640e4b78a9b6ef8ebf0f47c6fb7 Mon Sep 17 00:00:00 2001 From: Emilien Bauer Date: Tue, 1 Aug 2023 13:32:32 +0100 Subject: [PATCH 1/3] Add tiling. --- devito/operator/xdsl_operator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/devito/operator/xdsl_operator.py b/devito/operator/xdsl_operator.py index 2ffe000863..ff2283166a 100644 --- a/devito/operator/xdsl_operator.py +++ b/devito/operator/xdsl_operator.py @@ -53,9 +53,9 @@ # gpu-launch-sink-index-computations seemed to have no impact MLIR_GPU_PIPELINE = '"builtin.module(test-math-algebraic-simplification,scf-parallel-loop-tiling{parallel-loop-tile-sizes=128,1,1},func.func(gpu-map-parallel-loops),convert-parallel-loops-to-gpu,fold-memref-alias-ops,expand-strided-metadata,lower-affine,gpu-kernel-outlining,canonicalize,cse,convert-arith-to-llvm{index-bitwidth=64},finalize-memref-to-llvm{index-bitwidth=64},convert-scf-to-cf,convert-cf-to-llvm{index-bitwidth=64},canonicalize,cse,gpu.module(convert-gpu-to-nvvm,reconcile-unrealized-casts,canonicalize,gpu-to-cubin),gpu-to-llvm,canonicalize,cse)"' -XDSL_CPU_PIPELINE = "stencil-shape-inference,convert-stencil-to-ll-mlir,printf-to-llvm" +XDSL_CPU_PIPELINE = "stencil-shape-inference,convert-stencil-to-ll-mlir{tile-sizes=64,64},printf-to-llvm" XDSL_GPU_PIPELINE = "stencil-shape-inference,convert-stencil-to-ll-mlir{target=gpu},printf-to-llvm" -XDSL_MPI_PIPELINE = lambda decomp: f'"dmp-decompose-2d{decomp},canonicalize-dmp,convert-stencil-to-ll-mlir,dmp-to-mpi{{mpi_init=false}},lower-mpi,printf-to-llvm"' +XDSL_MPI_PIPELINE = lambda decomp: f'"dmp-decompose-2d{decomp},canonicalize-dmp,convert-stencil-to-ll-mlir{{tile-sizes=64,64}},dmp-to-mpi{{mpi_init=false}},lower-mpi,printf-to-llvm"' class XDSLOperator(Operator): From 63ab369f3f0ab9923b745f9f40b3646f9a644a2d Mon Sep 17 00:00:00 2001 From: Emilien Bauer Date: Tue, 1 Aug 2023 13:34:06 +0100 Subject: [PATCH 2/3] Add proper quoting. --- devito/operator/xdsl_operator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/devito/operator/xdsl_operator.py b/devito/operator/xdsl_operator.py index ff2283166a..292efa3b62 100644 --- a/devito/operator/xdsl_operator.py +++ b/devito/operator/xdsl_operator.py @@ -53,8 +53,8 @@ # gpu-launch-sink-index-computations seemed to have no impact MLIR_GPU_PIPELINE = '"builtin.module(test-math-algebraic-simplification,scf-parallel-loop-tiling{parallel-loop-tile-sizes=128,1,1},func.func(gpu-map-parallel-loops),convert-parallel-loops-to-gpu,fold-memref-alias-ops,expand-strided-metadata,lower-affine,gpu-kernel-outlining,canonicalize,cse,convert-arith-to-llvm{index-bitwidth=64},finalize-memref-to-llvm{index-bitwidth=64},convert-scf-to-cf,convert-cf-to-llvm{index-bitwidth=64},canonicalize,cse,gpu.module(convert-gpu-to-nvvm,reconcile-unrealized-casts,canonicalize,gpu-to-cubin),gpu-to-llvm,canonicalize,cse)"' -XDSL_CPU_PIPELINE = "stencil-shape-inference,convert-stencil-to-ll-mlir{tile-sizes=64,64},printf-to-llvm" -XDSL_GPU_PIPELINE = "stencil-shape-inference,convert-stencil-to-ll-mlir{target=gpu},printf-to-llvm" +XDSL_CPU_PIPELINE = '"stencil-shape-inference,convert-stencil-to-ll-mlir{tile-sizes=64,64},printf-to-llvm"' +XDSL_GPU_PIPELINE = '"stencil-shape-inference,convert-stencil-to-ll-mlir{target=gpu},printf-to-llvm"' XDSL_MPI_PIPELINE = lambda decomp: f'"dmp-decompose-2d{decomp},canonicalize-dmp,convert-stencil-to-ll-mlir{{tile-sizes=64,64}},dmp-to-mpi{{mpi_init=false}},lower-mpi,printf-to-llvm"' From e15ce9709edc65bd156ad067b15482ba39a68124 Mon Sep 17 00:00:00 2001 From: Emilien Bauer Date: Wed, 2 Aug 2023 13:03:59 +0100 Subject: [PATCH 3/3] Add dimensionality-1 tiling dimensions logic. --- devito/operator/xdsl_operator.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/devito/operator/xdsl_operator.py b/devito/operator/xdsl_operator.py index 292efa3b62..1450c95917 100644 --- a/devito/operator/xdsl_operator.py +++ b/devito/operator/xdsl_operator.py @@ -53,9 +53,9 @@ # gpu-launch-sink-index-computations seemed to have no impact MLIR_GPU_PIPELINE = '"builtin.module(test-math-algebraic-simplification,scf-parallel-loop-tiling{parallel-loop-tile-sizes=128,1,1},func.func(gpu-map-parallel-loops),convert-parallel-loops-to-gpu,fold-memref-alias-ops,expand-strided-metadata,lower-affine,gpu-kernel-outlining,canonicalize,cse,convert-arith-to-llvm{index-bitwidth=64},finalize-memref-to-llvm{index-bitwidth=64},convert-scf-to-cf,convert-cf-to-llvm{index-bitwidth=64},canonicalize,cse,gpu.module(convert-gpu-to-nvvm,reconcile-unrealized-casts,canonicalize,gpu-to-cubin),gpu-to-llvm,canonicalize,cse)"' -XDSL_CPU_PIPELINE = '"stencil-shape-inference,convert-stencil-to-ll-mlir{tile-sizes=64,64},printf-to-llvm"' +XDSL_CPU_PIPELINE = lambda nb_tiled_dims: f'"stencil-shape-inference,convert-stencil-to-ll-mlir{{tile-sizes={",".join(["64"]*nb_tiled_dims)}}},printf-to-llvm"' XDSL_GPU_PIPELINE = '"stencil-shape-inference,convert-stencil-to-ll-mlir{target=gpu},printf-to-llvm"' -XDSL_MPI_PIPELINE = lambda decomp: f'"dmp-decompose-2d{decomp},canonicalize-dmp,convert-stencil-to-ll-mlir{{tile-sizes=64,64}},dmp-to-mpi{{mpi_init=false}},lower-mpi,printf-to-llvm"' +XDSL_MPI_PIPELINE = lambda decomp, nb_tiled_dims: f'"dmp-decompose-2d{decomp},canonicalize-dmp,convert-stencil-to-ll-mlir{{tile-sizes={",".join(["64"]*nb_tiled_dims)}}},dmp-to-mpi{{mpi_init=false}},lower-mpi,printf-to-llvm"' class XDSLOperator(Operator): @@ -114,7 +114,9 @@ def _jit_compile(self): Printer(stream=module_str).print(self._module) module_str = module_str.getvalue() - xdsl_pipeline = XDSL_CPU_PIPELINE + to_tile = len(list(filter(lambda s : str(s) in ["x", "y", "z"], self.dimensions)))-1 + + xdsl_pipeline = XDSL_CPU_PIPELINE(to_tile) mlir_pipeline = MLIR_CPU_PIPELINE if is_omp: @@ -126,7 +128,7 @@ def _jit_compile(self): # reduce the domain of the computation (as devito has already done that for us) slices = ','.join(str(x) for x in shape) decomp = f"{{strategy=2d-grid slices={slices} restrict_domain=false}}" - xdsl_pipeline = XDSL_MPI_PIPELINE(decomp) + xdsl_pipeline = XDSL_MPI_PIPELINE(decomp, to_tile) elif is_gpu: xdsl_pipeline = XDSL_GPU_PIPELINE mlir_pipeline = MLIR_GPU_PIPELINE