diff --git a/devito/ir/ietxdsl/cluster_to_ssa.py b/devito/ir/ietxdsl/cluster_to_ssa.py
index 2ffb0d1887e..4117161b50f 100644
--- a/devito/ir/ietxdsl/cluster_to_ssa.py
+++ b/devito/ir/ietxdsl/cluster_to_ssa.py
@@ -1,7 +1,7 @@
 # ------------- devito import -------------#
 
 from sympy import Add, Expr, Float, Indexed, Integer, Mod, Mul, Pow, Symbol
-from xdsl.dialects import arith, builtin, func, memref, scf, stencil
+from xdsl.dialects import arith, builtin, func, memref, scf, stencil, gpu
 from xdsl.dialects.experimental import dmp, math
 from xdsl.ir import Attribute, Block, Operation, OpResult, Region, SSAValue
 from typing import Any
@@ -479,8 +479,25 @@ def match_and_rewrite(self, op: iet_ssa.LoadSymbolic, rewriter: PatternRewriter,
         if symb_name not in args:
             body = parent.body.blocks[0]
             args[symb_name] = body.insert_arg(op.result.type, len(body.args))
+            # GPU STUFF
+
+            arg = args[symb_name]
+            shapetype = arg.type
+            if isinstance(shapetype, stencil.FieldType):
+                memref_type = memref.MemRefType.from_element_type_and_shape(shapetype.get_element_type(), shapetype.get_shape())
+                alloc = gpu.AllocOp(memref.MemRefType.from_element_type_and_shape(shapetype.get_element_type(), shapetype.get_shape()))
+                outcast = builtin.UnrealizedConversionCastOp.get(alloc, shapetype)
+                # arg.replace_by(outcast.results[0])
+                args[symb_name] = outcast.results[0]
+                incast = builtin.UnrealizedConversionCastOp.get(arg, memref_type)
+                copy = gpu.MemcpyOp(source=incast, destination=alloc)
+                body.insert_ops_before([alloc, outcast, incast, copy], body.ops.first)
+                print(arg)
+                print(arg.uses)
+                print(parent)
 
         op.result.replace_by(args[symb_name])
+
         rewriter.erase_matched_op()
         parent.update_function_type()
         # attach information on parameter names to func
diff --git a/devito/operator/xdsl_operator.py b/devito/operator/xdsl_operator.py
index bbd9cbc4e18..8441e2cd368 100644
--- a/devito/operator/xdsl_operator.py
+++ b/devito/operator/xdsl_operator.py
@@ -53,9 +53,9 @@
 # gpu-launch-sink-index-computations seemed to have no impact
 MLIR_GPU_PIPELINE = '"builtin.module(test-math-algebraic-simplification,scf-parallel-loop-tiling{parallel-loop-tile-sizes=128,1,1},func.func(gpu-map-parallel-loops),convert-parallel-loops-to-gpu,fold-memref-alias-ops,expand-strided-metadata,lower-affine,gpu-kernel-outlining,canonicalize,cse,convert-arith-to-llvm{index-bitwidth=64},finalize-memref-to-llvm{index-bitwidth=64},convert-scf-to-cf,convert-cf-to-llvm{index-bitwidth=64},canonicalize,cse,gpu.module(convert-gpu-to-nvvm,reconcile-unrealized-casts,canonicalize,gpu-to-cubin),gpu-to-llvm,canonicalize,cse)"'
 
-XDSL_CPU_PIPELINE = "stencil-shape-inference,convert-stencil-to-ll-mlir,printf-to-llvm"
-XDSL_GPU_PIPELINE = "stencil-shape-inference,convert-stencil-to-ll-mlir{target=gpu},printf-to-llvm"
-XDSL_MPI_PIPELINE = lambda decomp: f'"dmp-decompose-2d{decomp},convert-stencil-to-ll-mlir,dmp-to-mpi{{mpi_init=false}},lower-mpi,printf-to-llvm"'
+XDSL_CPU_PIPELINE = "stencil-shape-inference,convert-stencil-to-ll-mlir,reconcile-unrealized-casts,printf-to-llvm"
+XDSL_GPU_PIPELINE = "stencil-shape-inference,convert-stencil-to-ll-mlir{target=gpu},reconcile-unrealized-casts,printf-to-llvm"
+XDSL_MPI_PIPELINE = lambda decomp: f'"dmp-decompose-2d{decomp},convert-stencil-to-ll-mlir,reconcile-unrealized-casts,dmp-to-mpi{{mpi_init=false}},lower-mpi,printf-to-llvm"'
 
 
 class XDSLOperator(Operator):