Skip to content

Commit

Permalink
Add more sensible and resilient tile sizes.
Browse files Browse the repository at this point in the history
  • Loading branch information
PapyChacal committed Aug 4, 2023
1 parent 693479e commit d845804
Showing 1 changed file with 5 additions and 2 deletions.
7 changes: 5 additions & 2 deletions devito/operator/xdsl_operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
MLIR_CPU_PIPELINE = '"builtin.module(canonicalize, cse, loop-invariant-code-motion, canonicalize, cse, loop-invariant-code-motion,cse,canonicalize,fold-memref-alias-ops,expand-strided-metadata, loop-invariant-code-motion,lower-affine,convert-scf-to-cf,convert-math-to-llvm,convert-func-to-llvm{use-bare-ptr-memref-call-conv},finalize-memref-to-llvm,canonicalize,cse)"'
MLIR_OPENMP_PIPELINE = '"builtin.module(canonicalize, cse, loop-invariant-code-motion, canonicalize, cse, loop-invariant-code-motion,cse,canonicalize,fold-memref-alias-ops,expand-strided-metadata, loop-invariant-code-motion,lower-affine,finalize-memref-to-llvm,loop-invariant-code-motion,canonicalize,cse,convert-scf-to-openmp,finalize-memref-to-llvm,convert-scf-to-cf,convert-func-to-llvm{use-bare-ptr-memref-call-conv},convert-openmp-to-llvm,convert-math-to-llvm,reconcile-unrealized-casts,canonicalize,cse)"'
# gpu-launch-sink-index-computations seemed to have no impact
MLIR_GPU_PIPELINE = '"builtin.module(test-math-algebraic-simplification,scf-parallel-loop-tiling{parallel-loop-tile-sizes=128,1,1},func.func(gpu-map-parallel-loops),convert-parallel-loops-to-gpu,fold-memref-alias-ops,expand-strided-metadata,lower-affine,canonicalize,cse,gpu-kernel-outlining,func.func(gpu-async-region),canonicalize,cse,convert-arith-to-llvm{index-bitwidth=64},finalize-memref-to-llvm{index-bitwidth=64},convert-scf-to-cf,convert-cf-to-llvm{index-bitwidth=64},canonicalize,cse,convert-func-to-llvm{use-bare-ptr-memref-call-conv},gpu.module(convert-gpu-to-nvvm,reconcile-unrealized-casts,canonicalize,gpu-to-cubin),gpu-to-llvm,canonicalize,cse)"'
MLIR_GPU_PIPELINE = lambda block_sizes: f'"builtin.module(test-math-algebraic-simplification,scf-parallel-loop-tiling{{parallel-loop-tile-sizes={block_sizes}}},func.func(gpu-map-parallel-loops),convert-parallel-loops-to-gpu,fold-memref-alias-ops,expand-strided-metadata,lower-affine,canonicalize,cse,gpu-kernel-outlining,func.func(gpu-async-region),canonicalize,cse,convert-arith-to-llvm{{index-bitwidth=64}},finalize-memref-to-llvm{{index-bitwidth=64}},convert-scf-to-cf,convert-cf-to-llvm{{index-bitwidth=64}},canonicalize,cse,convert-func-to-llvm{{use-bare-ptr-memref-call-conv}},gpu.module(convert-gpu-to-nvvm,reconcile-unrealized-casts,canonicalize,gpu-to-cubin),gpu-to-llvm,canonicalize,cse)"'

XDSL_CPU_PIPELINE = "stencil-shape-inference,convert-stencil-to-ll-mlir,reconcile-unrealized-casts,printf-to-llvm"
XDSL_GPU_PIPELINE = "stencil-shape-inference,convert-stencil-to-ll-mlir{target=gpu},reconcile-unrealized-casts,printf-to-llvm"
Expand Down Expand Up @@ -117,6 +117,9 @@ def _jit_compile(self):
xdsl_pipeline = XDSL_CPU_PIPELINE
mlir_pipeline = MLIR_CPU_PIPELINE

block_sizes: list[int] = [min(target, self._jit_kernel_constants.get(dim, 1)) for target, dim in zip([32, 4, 8], ["x", "y", "z"])]
block_sizes = ','.join(str(bs) for bs in block_sizes)

if is_omp:
mlir_pipeline = MLIR_OPENMP_PIPELINE

Expand All @@ -129,7 +132,7 @@ def _jit_compile(self):
xdsl_pipeline = XDSL_MPI_PIPELINE(decomp)
elif is_gpu:
xdsl_pipeline = XDSL_GPU_PIPELINE
mlir_pipeline = MLIR_GPU_PIPELINE
mlir_pipeline = MLIR_GPU_PIPELINE(block_sizes)

# allow jit backdooring to provide your own xdsl code
backdoor = os.getenv('XDSL_JIT_BACKDOOR')
Expand Down

0 comments on commit d845804

Please sign in to comment.