diff --git a/devito/operator/xdsl_operator.py b/devito/operator/xdsl_operator.py index 8e6c8e2614..27d0419350 100644 --- a/devito/operator/xdsl_operator.py +++ b/devito/operator/xdsl_operator.py @@ -53,9 +53,9 @@ # gpu-launch-sink-index-computations seemed to have no impact MLIR_GPU_PIPELINE = lambda block_sizes: f'"builtin.module(test-math-algebraic-simplification,scf-parallel-loop-tiling{{parallel-loop-tile-sizes={block_sizes}}},func.func(gpu-map-parallel-loops),convert-parallel-loops-to-gpu,lower-affine, canonicalize,cse, fold-memref-alias-ops, gpu-launch-sink-index-computations, gpu-kernel-outlining, canonicalize{{region-simplify}},cse,fold-memref-alias-ops,expand-strided-metadata,lower-affine,canonicalize,cse,func.func(gpu-async-region),canonicalize,cse,convert-arith-to-llvm{{index-bitwidth=64}},convert-scf-to-cf,convert-cf-to-llvm{{index-bitwidth=64}},canonicalize,cse,convert-func-to-llvm{{use-bare-ptr-memref-call-conv}},gpu.module(convert-gpu-to-nvvm,reconcile-unrealized-casts,canonicalize,gpu-to-cubin),gpu-to-llvm,canonicalize,cse)"' -XDSL_CPU_PIPELINE = lambda tile_sizes, collapse: f'"stencil-shape-inference,convert-stencil-to-ll-mlir{{{"tile-sizes="+",".join(["64"]*tile_sizes) if tile_sizes > 0 else ""}}},printf-to-llvm"' +XDSL_CPU_PIPELINE = lambda nb_tiled_dims, collapse: f'"stencil-shape-inference,convert-stencil-to-ll-mlir{{{"tile-sizes="+",".join(["64"]*nb_tiled_dims) if nb_tiled_dims > 0 else ""}}},printf-to-llvm"' XDSL_GPU_PIPELINE = "stencil-shape-inference,convert-stencil-to-ll-mlir{target=gpu},reconcile-unrealized-casts,printf-to-llvm" -XDSL_MPI_PIPELINE = lambda decomp, nb_tiled_dims, collapse: f'"dmp-decompose-2d{decomp},canonicalize-dmp,convert-stencil-to-ll-mlir{{tile-sizes={",".join(["64"]*nb_tiled_dims)} collapse={collapse}}},dmp-to-mpi{{mpi_init=false}},lower-mpi,printf-to-llvm"' +XDSL_MPI_PIPELINE = lambda decomp, nb_tiled_dims, collapse: f'"dmp-decompose{decomp},canonicalize-dmp,convert-stencil-to-ll-mlir{{{"tile-sizes="+",".join(["64"]*nb_tiled_dims) if nb_tiled_dims > 0 else ""} collapse={collapse}}},dmp-to-mpi{{mpi_init=false}},lower-mpi,printf-to-llvm"' class XDSLOperator(Operator): @@ -116,9 +116,11 @@ def _jit_compile(self): Printer(stream=module_str).print(self._module) module_str = module_str.getvalue() - to_tile = len(list(filter(lambda s : str(s) in ["x", "y", "z"], self.dimensions)))-1 + prob_dim = len(list(filter(lambda s : str(s) in ["x", "y", "z"], self.dimensions))) + to_tile = 0 if prob_dim == 2 else prob_dim-1 + collapse = prob_dim-1 - xdsl_pipeline = XDSL_CPU_PIPELINE(to_tile, to_tile) + xdsl_pipeline = XDSL_CPU_PIPELINE(to_tile, collapse) mlir_pipeline = MLIR_CPU_PIPELINE block_sizes: list[int] = [min(target, self._jit_kernel_constants.get(f"{dim}_size", 1)) for target, dim in zip([32, 4, 8], ["x", "y", "z"])]