diff --git a/devito/ir/ietxdsl/cluster_to_ssa.py b/devito/ir/ietxdsl/cluster_to_ssa.py index 2ffb0d1887e..4117161b50f 100644 --- a/devito/ir/ietxdsl/cluster_to_ssa.py +++ b/devito/ir/ietxdsl/cluster_to_ssa.py @@ -1,7 +1,7 @@ # ------------- devito import -------------# from sympy import Add, Expr, Float, Indexed, Integer, Mod, Mul, Pow, Symbol -from xdsl.dialects import arith, builtin, func, memref, scf, stencil +from xdsl.dialects import arith, builtin, func, memref, scf, stencil, gpu from xdsl.dialects.experimental import dmp, math from xdsl.ir import Attribute, Block, Operation, OpResult, Region, SSAValue from typing import Any @@ -479,8 +479,25 @@ def match_and_rewrite(self, op: iet_ssa.LoadSymbolic, rewriter: PatternRewriter, if symb_name not in args: body = parent.body.blocks[0] args[symb_name] = body.insert_arg(op.result.type, len(body.args)) + # GPU STUFF + + arg = args[symb_name] + shapetype = arg.type + if isinstance(shapetype, stencil.FieldType): + memref_type = memref.MemRefType.from_element_type_and_shape(shapetype.get_element_type(), shapetype.get_shape()) + alloc = gpu.AllocOp(memref.MemRefType.from_element_type_and_shape(shapetype.get_element_type(), shapetype.get_shape())) + outcast = builtin.UnrealizedConversionCastOp.get(alloc, shapetype) + # arg.replace_by(outcast.results[0]) + args[symb_name] = outcast.results[0] + incast = builtin.UnrealizedConversionCastOp.get(arg, memref_type) + copy = gpu.MemcpyOp(source=incast, destination=alloc) + body.insert_ops_before([alloc, outcast, incast, copy], body.ops.first) + print(arg) + print(arg.uses) + print(parent) op.result.replace_by(args[symb_name]) + rewriter.erase_matched_op() parent.update_function_type() # attach information on parameter names to func diff --git a/devito/operator/xdsl_operator.py b/devito/operator/xdsl_operator.py index bbd9cbc4e18..8441e2cd368 100644 --- a/devito/operator/xdsl_operator.py +++ b/devito/operator/xdsl_operator.py @@ -53,9 +53,9 @@ # gpu-launch-sink-index-computations seemed to have no impact MLIR_GPU_PIPELINE = '"builtin.module(test-math-algebraic-simplification,scf-parallel-loop-tiling{parallel-loop-tile-sizes=128,1,1},func.func(gpu-map-parallel-loops),convert-parallel-loops-to-gpu,fold-memref-alias-ops,expand-strided-metadata,lower-affine,gpu-kernel-outlining,canonicalize,cse,convert-arith-to-llvm{index-bitwidth=64},finalize-memref-to-llvm{index-bitwidth=64},convert-scf-to-cf,convert-cf-to-llvm{index-bitwidth=64},canonicalize,cse,gpu.module(convert-gpu-to-nvvm,reconcile-unrealized-casts,canonicalize,gpu-to-cubin),gpu-to-llvm,canonicalize,cse)"' -XDSL_CPU_PIPELINE = "stencil-shape-inference,convert-stencil-to-ll-mlir,printf-to-llvm" -XDSL_GPU_PIPELINE = "stencil-shape-inference,convert-stencil-to-ll-mlir{target=gpu},printf-to-llvm" -XDSL_MPI_PIPELINE = lambda decomp: f'"dmp-decompose-2d{decomp},convert-stencil-to-ll-mlir,dmp-to-mpi{{mpi_init=false}},lower-mpi,printf-to-llvm"' +XDSL_CPU_PIPELINE = "stencil-shape-inference,convert-stencil-to-ll-mlir,reconcile-unrealized-casts,printf-to-llvm" +XDSL_GPU_PIPELINE = "stencil-shape-inference,convert-stencil-to-ll-mlir{target=gpu},reconcile-unrealized-casts,printf-to-llvm" +XDSL_MPI_PIPELINE = lambda decomp: f'"dmp-decompose-2d{decomp},convert-stencil-to-ll-mlir,reconcile-unrealized-casts,dmp-to-mpi{{mpi_init=false}},lower-mpi,printf-to-llvm"' class XDSLOperator(Operator):