From 6146b87880efca1f0ec85521052be37e1120cc5a Mon Sep 17 00:00:00 2001
From: George Bisbas <g.bisbas18@imperial.ac.uk>
Date: Fri, 24 Nov 2023 19:55:05 +0000
Subject: [PATCH 1/6] compiler: Simplify

---
 devito/ir/ietxdsl/cluster_to_ssa.py | 40 +++++++++++++++++------------
 1 file changed, 23 insertions(+), 17 deletions(-)

diff --git a/devito/ir/ietxdsl/cluster_to_ssa.py b/devito/ir/ietxdsl/cluster_to_ssa.py
index 9bf155d206..4624aec01c 100644
--- a/devito/ir/ietxdsl/cluster_to_ssa.py
+++ b/devito/ir/ietxdsl/cluster_to_ssa.py
@@ -11,7 +11,7 @@
 # ------------- devito imports -------------#
 from devito import Grid, SteppingDimension
 from devito.ir.equations import LoweredEq
-from devito.symbolics import retrieve_indexed
+from devito.symbolics import retrieve_indexed, retrieve_function_carriers
 from devito.logger import perf
 
 # ------------- devito-xdsl SSA imports -------------#
@@ -50,26 +50,26 @@ def _convert_eq(self, eq: LoweredEq):
         function = eq.lhs.function
         mlir_type = dtypes_to_xdsltypes[function.dtype]
         grid: Grid = function.grid
-        # get the halo of the space dimensions only e.g [(2, 2), (2, 2)] for the 2d case
+
+        # Get the halo of the grid.dimensions
+        # e.g [(2, 2), (2, 2)] for the 2D case
         # Do not forget the issue with Devito adding an extra point!
+        # Check 'def halo_setup' for more
         # (for derivative regions)
-        halo = [function.halo[function.dimensions.index(d)] for d in grid.dimensions]
-
+        halo = [function.halo[d] for d in grid.dimensions]
+        
         # Shift all time values so that for all accesses at t + n, n>=0.
         self.time_offs = min(
             int(idx.indices[0] - grid.stepping_dim) for idx in retrieve_indexed(eq)
         )
 
-        # Calculate the actual size of our time dimension
-        actual_time_size = (
-            max(int(idx.indices[0] - grid.stepping_dim) for idx in retrieve_indexed(eq))
-            - self.time_offs
-            + 1
-        )
-
+        
+        # Get the time_size       
+        time_size = max(d.function.time_size for d in retrieve_function_carriers(eq))
+        
         # Build the for loop
         perf("Build Time Loop")
-        loop = self._build_iet_for(grid.stepping_dim, actual_time_size)
+        loop = self._build_iet_for(grid.stepping_dim, time_size)
 
         # build stencil
         perf("Initialize a stencil Op")
@@ -77,7 +77,7 @@ def _convert_eq(self, eq: LoweredEq):
             loop.subindice_ssa_vals(),
             grid.shape_local,
             halo,
-            actual_time_size,
+            time_size,
             mlir_type,
             eq.lhs.function._C_name,
         )
@@ -87,7 +87,7 @@ def _convert_eq(self, eq: LoweredEq):
         # dims -> ssa vals
         perf("Apply time offsets")
         time_offset_to_field: dict[str, SSAValue] = {
-            i: stencil_op.block.args[i] for i in range(actual_time_size - 1)
+            i: stencil_op.block.args[i] for i in range(time_size - 1)
         }
 
         # reset loaded values
@@ -103,8 +103,10 @@ def _convert_eq(self, eq: LoweredEq):
 
         # emit return
         offsets = _get_dim_offsets(eq.lhs, self.time_offs)
+        # import pdb;pdb.set_trace()
+
         assert (
-            offsets[0] == actual_time_size - 1
+            offsets[0] == time_size - 1
         ), "result should be written to last time buffer"
         assert all(
             o == 0 for o in offsets[1:]
@@ -153,8 +155,10 @@ def _visit_math_nodes(self, node: Expr) -> SSAValue:
             # select the correct op from arith.addi, arith.addf, arith.muli, arith.mulf
             if isinstance(carry.type, builtin.IntegerType):
                 op_cls = arith.Addi if isinstance(node, Add) else arith.Muli
-            else:
+            elif isinstance(carry.type, builtin.Float32Type):
                 op_cls = arith.Addf if isinstance(node, Add) else arith.Mulf
+            else:
+                raise("Add support for another type")
 
             for arg in args:
                 op = op_cls(carry, arg)
@@ -202,10 +206,11 @@ def _add_access_ops(
             """
             # get the compile time constant offsets for this read
             offsets = _get_dim_offsets(read, self.time_offs)
+            
             if offsets in self.loaded_values:
                 continue
 
-            # assume time dimension is first dimension
+            # Assume time dimension is first dimension
             t_offset = offsets[0]
             space_offsets = offsets[1:]
 
@@ -280,6 +285,7 @@ def _get_dim_offsets(idx: Indexed, t_offset: int) -> tuple:
     # shift all time values so that for all accesses at t + n, n>=0.
     # time_offs = min(int(i - d) for i, d in zip(idx.indices, idx.function.dimensions))
     halo = ((t_offset, 0), *idx.function.halo[1:])
+
     try:
         return tuple(
             int(i - d - halo_offset)

From c4bef2d4e4ad15640836b6ae713d269cf333c91c Mon Sep 17 00:00:00 2001
From: George Bisbas <g.bisbas18@imperial.ac.uk>
Date: Tue, 28 Nov 2023 12:26:40 +0000
Subject: [PATCH 2/6] compiler: Drop more code

---
 devito/ir/ietxdsl/__init__.py       |  6 ++--
 devito/ir/ietxdsl/cluster_to_ssa.py |  9 ++----
 devito/ir/ietxdsl/lowering.py       | 43 -----------------------------
 devito/ir/ietxdsl/utils.py          | 10 +++++++
 4 files changed, 16 insertions(+), 52 deletions(-)
 create mode 100644 devito/ir/ietxdsl/utils.py

diff --git a/devito/ir/ietxdsl/__init__.py b/devito/ir/ietxdsl/__init__.py
index 2ba899e861..96bddfce7b 100644
--- a/devito/ir/ietxdsl/__init__.py
+++ b/devito/ir/ietxdsl/__init__.py
@@ -1,2 +1,4 @@
-from devito.ir.ietxdsl.lowering import LowerIetForToScfFor, LowerIetForToScfParallel, DropIetComments, iet_to_standard_mlir # noqa
-from devito.ir.ietxdsl.cluster_to_ssa import finalize_module_with_globals, convert_devito_stencil_to_xdsl_stencil  # noqa
+from devito.ir.ietxdsl.lowering import (LowerIetForToScfFor, LowerIetForToScfParallel,
+                                        iet_to_standard_mlir)  # noqa
+from devito.ir.ietxdsl.cluster_to_ssa import (finalize_module_with_globals,
+                                              convert_devito_stencil_to_xdsl_stencil)  # noqa
diff --git a/devito/ir/ietxdsl/cluster_to_ssa.py b/devito/ir/ietxdsl/cluster_to_ssa.py
index 4624aec01c..4947b416ed 100644
--- a/devito/ir/ietxdsl/cluster_to_ssa.py
+++ b/devito/ir/ietxdsl/cluster_to_ssa.py
@@ -16,8 +16,10 @@
 
 # ------------- devito-xdsl SSA imports -------------#
 from devito.ir.ietxdsl import iet_ssa
+from devito.ir.ietxdsl.utils import is_int, is_float
 from devito.ir.ietxdsl.ietxdsl_functions import dtypes_to_xdsltypes
 
+
 # flake8: noqa
 
 class ExtractDevitoStencilConversion:
@@ -297,13 +299,6 @@ def _get_dim_offsets(idx: Indexed, t_offset: int) -> tuple:
         raise ValueError("Indices must be constant offset from dimension!") from ex
 
 
-def is_int(val: SSAValue):
-    return isinstance(val.type, builtin.IntegerType)
-
-
-def is_float(val: SSAValue):
-    return val.type in (builtin.f32, builtin.f64)
-
 
 # -------------------------------------------------------- ####
 #                                                          ####
diff --git a/devito/ir/ietxdsl/lowering.py b/devito/ir/ietxdsl/lowering.py
index 6505f1d234..e7e0e5bb81 100644
--- a/devito/ir/ietxdsl/lowering.py
+++ b/devito/ir/ietxdsl/lowering.py
@@ -8,36 +8,6 @@
                                    GreedyRewritePatternApplier, op_type_rewrite_pattern)
 
 
-def _generate_subindices(subindices: int, block: Block,
-                         rewriter: PatternRewriter):
-    # keep track of the what argument we should replace with what
-    arg_changes: list[tuple[SSAValue, SSAValue]] = []
-
-    # keep track of the ops we want to insert
-    modulo = arith.Constant.from_int_and_width(subindices, builtin.i64)
-    new_ops = [modulo]
-
-    # generate the new indices
-    for i in range(subindices):
-        offset = arith.Constant.from_int_and_width(i, builtin.i64)
-        index_off = arith.Addi(block.args[0], offset)
-        index = arith.RemSI(index_off, modulo)
-
-        new_ops += [
-            offset,
-            index_off,
-            index,
-        ]
-        # replace block.args[i+1] with (arg0 + i) % n
-        arg_changes.append((block.args[i + 1], index.result))
-
-    rewriter.insert_op_at_start(new_ops, block)
-
-    for old, new in arg_changes:
-        old.replace_by(new)
-        block.erase_arg(old)
-
-
 class ConvertScfForArgsToIndex(RewritePattern):
     @op_type_rewrite_pattern
     def match_and_rewrite(self, op: scf.For, rewriter: PatternRewriter, /):
@@ -210,19 +180,6 @@ def recurse_scf_parallel(
         return lbs, ubs, steps, body
 
 
-class DropIetComments(RewritePattern):
-    """
-    This drops all iet.comment operations
-
-    TODO: convert iet.comment ops that have timer info into their own nodes
-    """
-
-    @op_type_rewrite_pattern
-    def match_and_rewrite(self, op: iet_ssa.Statement,
-                          rewriter: PatternRewriter, /):
-        rewriter.erase_matched_op()
-
-
 @dataclass
 class LowerIetPointerCastAndDataObject(RewritePattern):
     dimensions: list[SSAValue] = field(default_factory=list)
diff --git a/devito/ir/ietxdsl/utils.py b/devito/ir/ietxdsl/utils.py
new file mode 100644
index 0000000000..6d413c8898
--- /dev/null
+++ b/devito/ir/ietxdsl/utils.py
@@ -0,0 +1,10 @@
+from xdsl.dialects import builtin
+from xdsl.ir import SSAValue
+
+
+def is_int(val: SSAValue):
+    return isinstance(val.type, builtin.IntegerType)
+
+
+def is_float(val: SSAValue):
+    return val.type in (builtin.f32, builtin.f64)

From b2b1aba112fddf0bef3e076e80b45aa8f7821c09 Mon Sep 17 00:00:00 2001
From: George Bisbas <g.bisbas18@imperial.ac.uk>
Date: Tue, 28 Nov 2023 16:40:05 +0000
Subject: [PATCH 3/6] cleanup: Drop useless rewrite and restructure

---
 devito/core/cpu.py                  |  29 +++++++
 devito/ir/ietxdsl/__init__.py       |   7 +-
 devito/ir/ietxdsl/cluster_to_ssa.py |  11 +--
 devito/ir/ietxdsl/lowering.py       |  12 +--
 devito/operator/xdsl_operator.py    | 123 ++++++++--------------------
 tests/test_xdsl_base.py             |  36 ++++++++
 6 files changed, 117 insertions(+), 101 deletions(-)

diff --git a/devito/core/cpu.py b/devito/core/cpu.py
index 7137c08b06..15a60768a7 100644
--- a/devito/core/cpu.py
+++ b/devito/core/cpu.py
@@ -311,3 +311,32 @@ class Cpu64FsgCOperator(Cpu64FsgOperator):
 
 class Cpu64FsgOmpOperator(Cpu64FsgOperator):
     _Target = OmpTarget
+
+
+# -----------XDSL
+# This is a collection of xDSL optimization pipelines
+# Ideally they should follow the same type of subclassing as the rest of
+# the Devito Operatos
+
+
+MLIR_CPU_PIPELINE = '"builtin.module(canonicalize, cse, loop-invariant-code-motion, canonicalize, cse, loop-invariant-code-motion, cse, canonicalize, fold-memref-alias-ops, expand-strided-metadata, loop-invariant-code-motion, lower-affine, convert-scf-to-cf, convert-math-to-llvm, convert-func-to-llvm{use-bare-ptr-memref-call-conv}, finalize-memref-to-llvm, canonicalize, cse)"'  # noqa
+
+MLIR_OPENMP_PIPELINE = '"builtin.module(canonicalize, cse, loop-invariant-code-motion, canonicalize, cse, loop-invariant-code-motion,cse,canonicalize,fold-memref-alias-ops,expand-strided-metadata, loop-invariant-code-motion,lower-affine,finalize-memref-to-llvm,loop-invariant-code-motion,canonicalize,cse,convert-scf-to-openmp,finalize-memref-to-llvm,convert-scf-to-cf,convert-func-to-llvm{use-bare-ptr-memref-call-conv},convert-openmp-to-llvm,convert-math-to-llvm,reconcile-unrealized-casts,canonicalize,cse)"'   # noqa
+# gpu-launch-sink-index-computations seemed to have no impact
+MLIR_GPU_PIPELINE = lambda block_sizes: f'"builtin.module(test-math-algebraic-simplification,scf-parallel-loop-tiling{{parallel-loop-tile-sizes={block_sizes}}},func.func(gpu-map-parallel-loops),convert-parallel-loops-to-gpu,lower-affine, canonicalize,cse, fold-memref-alias-ops, gpu-launch-sink-index-computations, gpu-kernel-outlining, canonicalize{{region-simplify}},cse,fold-memref-alias-ops,expand-strided-metadata,lower-affine,canonicalize,cse,func.func(gpu-async-region),canonicalize,cse,convert-arith-to-llvm{{index-bitwidth=64}},convert-scf-to-cf,convert-cf-to-llvm{{index-bitwidth=64}},canonicalize,cse,convert-func-to-llvm{{use-bare-ptr-memref-call-conv}},gpu.module(convert-gpu-to-nvvm,reconcile-unrealized-casts,canonicalize,gpu-to-cubin),gpu-to-llvm,canonicalize,cse)"'   # noqa
+
+XDSL_CPU_PIPELINE = lambda nb_tiled_dims: f'"stencil-shape-inference,convert-stencil-to-ll-mlir{{{generate_tiling_arg(nb_tiled_dims)}}},printf-to-llvm"'  # noqa
+
+XDSL_GPU_PIPELINE = "stencil-shape-inference,convert-stencil-to-ll-mlir{target=gpu},reconcile-unrealized-casts,printf-to-llvm"  # noqa
+
+XDSL_MPI_PIPELINE = lambda decomp, nb_tiled_dims: f'"dmp-decompose{decomp},canonicalize-dmp,convert-stencil-to-ll-mlir{{{generate_tiling_arg(nb_tiled_dims)}}},dmp-to-mpi{{mpi_init=false}},lower-mpi,printf-to-llvm"'   # noqa
+
+
+def generate_tiling_arg(nb_tiled_dims: int):
+    """
+    Generate the tile-sizes arg for the convert-stencil-to-ll-mlir pass.
+    Generating no argument if the diled_dims arg is 0
+    """
+    if nb_tiled_dims == 0:
+        return ''
+    return "tile-sizes=" + ",".join(["64"]*nb_tiled_dims)
diff --git a/devito/ir/ietxdsl/__init__.py b/devito/ir/ietxdsl/__init__.py
index 96bddfce7b..24065c3581 100644
--- a/devito/ir/ietxdsl/__init__.py
+++ b/devito/ir/ietxdsl/__init__.py
@@ -1,4 +1,5 @@
-from devito.ir.ietxdsl.lowering import (LowerIetForToScfFor, LowerIetForToScfParallel,
-                                        iet_to_standard_mlir)  # noqa
+from devito.ir.ietxdsl.lowering import (LowerIetForToScfFor, LowerIetForToScfParallel)
 from devito.ir.ietxdsl.cluster_to_ssa import (finalize_module_with_globals,
-                                              convert_devito_stencil_to_xdsl_stencil)  # noqa
+                                              convert_devito_stencil_to_xdsl_stencil)
+
+# flake8: noqa
diff --git a/devito/ir/ietxdsl/cluster_to_ssa.py b/devito/ir/ietxdsl/cluster_to_ssa.py
index 4947b416ed..a452919a82 100644
--- a/devito/ir/ietxdsl/cluster_to_ssa.py
+++ b/devito/ir/ietxdsl/cluster_to_ssa.py
@@ -59,13 +59,13 @@ def _convert_eq(self, eq: LoweredEq):
         # Check 'def halo_setup' for more
         # (for derivative regions)
         halo = [function.halo[d] for d in grid.dimensions]
-        
+
         # Shift all time values so that for all accesses at t + n, n>=0.
         self.time_offs = min(
             int(idx.indices[0] - grid.stepping_dim) for idx in retrieve_indexed(eq)
         )
 
-        
+
         # Get the time_size       
         time_size = max(d.function.time_size for d in retrieve_function_carriers(eq))
         
@@ -105,7 +105,6 @@ def _convert_eq(self, eq: LoweredEq):
 
         # emit return
         offsets = _get_dim_offsets(eq.lhs, self.time_offs)
-        # import pdb;pdb.set_trace()
 
         assert (
             offsets[0] == time_size - 1
@@ -546,8 +545,10 @@ def finalize_module_with_globals(module: builtin.ModuleOp, known_symbols: dict[s
         _InsertSymbolicConstants(known_symbols),
         _LowerLoadSymbolidToFuncArgs(),
     ]
-    grpa = GreedyRewritePatternApplier(patterns)
-    PatternRewriteWalker(grpa).rewrite_module(module)
+    rewriter = GreedyRewritePatternApplier(patterns)
+    PatternRewriteWalker(rewriter).rewrite_module(module)
+
+    # GPU boilerplate
     if gpu_boilerplate:
         walker = PatternRewriteWalker(GreedyRewritePatternApplier([WrapFunctionWithTransfers('apply_kernel')]))
         walker.rewrite_module(module)
diff --git a/devito/ir/ietxdsl/lowering.py b/devito/ir/ietxdsl/lowering.py
index e7e0e5bb81..a74e7ae908 100644
--- a/devito/ir/ietxdsl/lowering.py
+++ b/devito/ir/ietxdsl/lowering.py
@@ -260,15 +260,16 @@ def match_and_rewrite(self, op: func.FuncOp, rewriter: PatternRewriter, /):
                     iet_ssa.Dataobj.get_llvm_struct_type(), )
             elif isinstance(arg_typ, iet_ssa.Profiler):
                 op.body.blocks[0].args[i].type = llvm.LLVMPointerType.opaque()
-
         recalc_func_type(op)
 
 
 def recalc_func_type(op: func.FuncOp):
-    op.attributes['function_type'] = builtin.FunctionType.from_lists(
-        [arg.type for arg in op.body.blocks[0].args],
-        op.function_type.outputs.data,
-    )
+    # Only if blocks exist
+    if op.body.blocks:
+        op.attributes['function_type'] = builtin.FunctionType.from_lists(
+            [arg.type for arg in op.body.blocks[0].args],
+            op.function_type.outputs.data,
+        )
 
 
 @dataclass
@@ -369,7 +370,6 @@ def iet_to_standard_mlir(module: builtin.ModuleOp):
             LowerIetForToScfFor(),
             ConvertScfForArgsToIndex(),
             ConvertScfParallelArgsToIndex(),
-            DropIetComments(),
             CleanupDanglingIetDatatypes(),
             ptr_lower := LowerIetPointerCastAndDataObject(),
             LowerMemrefLoadToLLvmPointer(ptr_lower),
diff --git a/devito/operator/xdsl_operator.py b/devito/operator/xdsl_operator.py
index 0e4e059b8d..4c9e3e036b 100644
--- a/devito/operator/xdsl_operator.py
+++ b/devito/operator/xdsl_operator.py
@@ -4,7 +4,7 @@
 import tempfile
 
 from math import ceil
-from collections import OrderedDict, namedtuple
+from collections import OrderedDict
 from io import StringIO
 from operator import attrgetter
 
@@ -26,12 +26,13 @@
 from devito.logger import debug, info, perf, warning, is_log_enabled_for
 from devito.operator.operator import IRs
 from devito.operator.profiling import AdvancedProfilerVerbose, create_profile
+from devito.operator.registry import operator_selector
 from devito.parameters import configuration
 from devito.passes import (Graph, lower_index_derivatives, generate_implicit,
                            generate_macros, minimize_symbols, unevaluate)
 from devito.passes.iet import CTarget
 from devito.symbolics import estimate_cost
-from devito.tools import (DAG, OrderedSet, ReducerMap, as_tuple, flatten,
+from devito.tools import (DAG, ReducerMap, as_tuple, flatten,
                           filter_sorted, frozendict, is_integer, split, timed_pass,
                           contains_val)
 from devito.types import Evaluable, TimeFunction, Grid
@@ -40,7 +41,9 @@
 
 from xdsl.printer import Printer
 
-# flake8: noqa
+
+from devito.core.cpu import (MLIR_CPU_PIPELINE, XDSL_CPU_PIPELINE, XDSL_MPI_PIPELINE,
+                             MLIR_OPENMP_PIPELINE, XDSL_GPU_PIPELINE, MLIR_GPU_PIPELINE)
 
 __all__ = ['XDSLOperator']
 
@@ -64,27 +67,6 @@
 """
 
 
-def generate_tiling_arg(nb_tiled_dims: int):
-    """
-    Generate the tile-sizes arg for the convert-stencil-to-ll-mlir pass. Generating no argument if the diled_dims arg is 0
-    """
-    if nb_tiled_dims == 0:
-        return ''
-    return "tile-sizes=" + ",".join(["64"]*nb_tiled_dims)
-
-
-CFLAGS = "-O3 -march=native -mtune=native -lmlir_c_runner_utils"
-
-MLIR_CPU_PIPELINE = '"builtin.module(canonicalize, cse, loop-invariant-code-motion, canonicalize, cse, loop-invariant-code-motion,cse,canonicalize,fold-memref-alias-ops,expand-strided-metadata, loop-invariant-code-motion,lower-affine,convert-scf-to-cf,convert-math-to-llvm,convert-func-to-llvm{use-bare-ptr-memref-call-conv},finalize-memref-to-llvm,canonicalize,cse)"'
-MLIR_OPENMP_PIPELINE = '"builtin.module(canonicalize, cse, loop-invariant-code-motion, canonicalize, cse, loop-invariant-code-motion,cse,canonicalize,fold-memref-alias-ops,expand-strided-metadata, loop-invariant-code-motion,lower-affine,finalize-memref-to-llvm,loop-invariant-code-motion,canonicalize,cse,convert-scf-to-openmp,finalize-memref-to-llvm,convert-scf-to-cf,convert-func-to-llvm{use-bare-ptr-memref-call-conv},convert-openmp-to-llvm,convert-math-to-llvm,reconcile-unrealized-casts,canonicalize,cse)"'
-# gpu-launch-sink-index-computations seemed to have no impact
-MLIR_GPU_PIPELINE = lambda block_sizes: f'"builtin.module(test-math-algebraic-simplification,scf-parallel-loop-tiling{{parallel-loop-tile-sizes={block_sizes}}},func.func(gpu-map-parallel-loops),convert-parallel-loops-to-gpu,lower-affine, canonicalize,cse, fold-memref-alias-ops, gpu-launch-sink-index-computations, gpu-kernel-outlining, canonicalize{{region-simplify}},cse,fold-memref-alias-ops,expand-strided-metadata,lower-affine,canonicalize,cse,func.func(gpu-async-region),canonicalize,cse,convert-arith-to-llvm{{index-bitwidth=64}},convert-scf-to-cf,convert-cf-to-llvm{{index-bitwidth=64}},canonicalize,cse,convert-func-to-llvm{{use-bare-ptr-memref-call-conv}},gpu.module(convert-gpu-to-nvvm,reconcile-unrealized-casts,canonicalize,gpu-to-cubin),gpu-to-llvm,canonicalize,cse)"'
-
-XDSL_CPU_PIPELINE = lambda nb_tiled_dims: f'"stencil-shape-inference,convert-stencil-to-ll-mlir{{{generate_tiling_arg(nb_tiled_dims)}}},printf-to-llvm"'
-XDSL_GPU_PIPELINE = "stencil-shape-inference,convert-stencil-to-ll-mlir{target=gpu},reconcile-unrealized-casts,printf-to-llvm"
-XDSL_MPI_PIPELINE = lambda decomp, nb_tiled_dims: f'"dmp-decompose{decomp},canonicalize-dmp,convert-stencil-to-ll-mlir{{{generate_tiling_arg(nb_tiled_dims)}}},dmp-to-mpi{{mpi_init=false}},lower-mpi,printf-to-llvm"'
-
-
 class XDSLOperator(Operator):
 
     _Target = CTarget
@@ -116,6 +98,7 @@ def _make_interop_o(self):
 
     @property
     def mpi_shape(self) -> tuple:
+        # TODO: move it elsewhere
         dist = self.functions[0].grid.distributor
 
         # reverse topology for row->column major
@@ -128,7 +111,7 @@ def _jit_compile(self):
         once per Operator, reagardless of how many times this method
         is invoked.
         """
-       
+
         with self._profiler.timer_on('jit-compile'):
             is_mpi = MPI.Is_initialized()
             is_gpu = os.environ.get("DEVITO_PLATFORM", None) == 'nvidiaX'
@@ -149,21 +132,19 @@ def _jit_compile(self):
             Printer(stream=module_str).print(self._module)
             module_str = module_str.getvalue()
 
-            to_tile = len(list(filter(lambda s: str(s) in ["x", "y", "z"], self.dimensions)))-1
+            to_tile = len(list(filter(lambda d: d.is_Space, self.dimensions)))-1
 
             xdsl_pipeline = XDSL_CPU_PIPELINE(to_tile)
             mlir_pipeline = MLIR_CPU_PIPELINE
 
-            block_sizes: list[int] = [min(target, self._jit_kernel_constants.get(f"{dim}_size", 1)) for target, dim in zip([32, 4, 8], ["x", "y", "z"])]
-            block_sizes = ','.join(str(bs) for bs in block_sizes)
-
             if is_omp:
                 mlir_pipeline = MLIR_OPENMP_PIPELINE
 
             if is_mpi:
                 shape, mpi_rank = self.mpi_shape
                 # Run with restrict domain=false so we only introduce the swaps but don't
-                # reduce the domain of the computation (as devito has already done that for us)
+                # reduce the domain of the computation
+                # (as devito has already done that for us)
                 slices = ','.join(str(x) for x in shape)
 
                 decomp = "2d-grid" if len(shape) == 2 else "3d-grid"
@@ -172,6 +153,9 @@ def _jit_compile(self):
                 xdsl_pipeline = XDSL_MPI_PIPELINE(decomp, to_tile)
             elif is_gpu:
                 xdsl_pipeline = XDSL_GPU_PIPELINE
+                # Get GPU blocking shapes
+                block_sizes: list[int] = [min(target, self._jit_kernel_constants.get(f"{dim}_size", 1)) for target, dim in zip([32, 4, 8], ["x", "y", "z"])]  # noqa
+                block_sizes = ','.join(str(bs) for bs in block_sizes)
                 mlir_pipeline = MLIR_GPU_PIPELINE(block_sizes)
 
             # allow jit backdooring to provide your own xdsl code
@@ -190,8 +174,9 @@ def _jit_compile(self):
             source_file.close()
 
             # Compile IR using xdsl-opt | mlir-opt | mlir-translate | clang
+            cflags = "-O3 -march=native -mtune=native -lmlir_c_runner_utils"
+
             try:
-                cflags = CFLAGS
                 cc = "clang"
 
                 if is_mpi:
@@ -208,15 +193,12 @@ def _jit_compile(self):
                 xdsl_cmd = f'xdsl-opt {source_name} -p {xdsl_pipeline}'
                 mlir_cmd = f'mlir-opt -p {mlir_pipeline}'
                 mlir_translate_cmd = 'mlir-translate --mlir-to-llvmir'
-                clang_cmd = f'{cc} {cflags} -shared -o {self._tf.name} {self._interop_tf.name} -xir -'
+                clang_cmd = f'{cc} {cflags} -shared -o {self._tf.name} {self._interop_tf.name} -xir -'  # noqa
 
-
-                comp_steps = [
-                              xdsl_cmd,
+                comp_steps = [xdsl_cmd,
                               mlir_cmd,
                               mlir_translate_cmd,
-                              clang_cmd 
-                             ]
+                              clang_cmd]
 
                 # Execute each command and store the outputs
                 outputs = []
@@ -231,7 +213,7 @@ def _jit_compile(self):
                         'stdout': stdout,
                         'stderr': stderr
                     })
-                
+
             except Exception as ex:
                 print("error")
                 raise ex
@@ -241,9 +223,10 @@ def _jit_compile(self):
         perf("XDSLOperator `%s` jit-compiled `%s` in %.2f s with `mlir-opt`" %
              (self.name, source_name, elapsed))
 
-
     def _cmd_compile(self, cmd, input=None):
-        stdin = subprocess.PIPE if input is not None else None
+
+        # Could be dropped unless PIPE is never empty in the future
+        stdin = subprocess.PIPE if input is not None else None  # noqa
 
         res = subprocess.run(
             cmd,
@@ -271,11 +254,12 @@ def setup_memref_args(self):
         """
         args = dict()
         for arg in self.functions:
+            # For every TimeFunction add memref
             if isinstance(arg, TimeFunction):
-                data = arg._data_allocated
-                # iterate over the first dimension (time)
+                data = arg._data
                 for t in range(data.shape[0]):
                     args[f'{arg._C_name}_{t}'] = data[t, ...].ctypes.data_as(ptr_of(f32))
+
         self._jit_kernel_constants.update(args)
 
     @classmethod
@@ -352,7 +336,6 @@ def _lower(cls, expressions, **kwargs):
         """
         # Create a symbol registry
         kwargs['sregistry'] = SymbolRegistry()
-
         expressions = as_tuple(expressions)
 
         # Input check
@@ -800,10 +783,10 @@ def arguments(self, **kwargs):
 
     # Code generation and JIT compilation
 
-    #@cached_property
-    #def _soname(self):
-    #    """A unique name for the shared object resulting from JIT compilation."""
-    #    return Signer._digest(self, configuration)
+    # @cached_property
+    # def _soname(self):
+    #     """A unique name for the shared object resulting from JIT compilation."""
+    #     return Signer._digest(self, configuration)
 
     @cached_property
     def ccode(self):
@@ -825,7 +808,9 @@ def cfunction(self):
         if self._cfunction is None:
             self._cfunction = getattr(self._lib, "apply_kernel")
             # Associate a C type to each argument for runtime type check
-            self._cfunction.argtypes = self._construct_cfunction_args(self._jit_kernel_constants, get_types=True)
+            argtypes = self._construct_cfunction_args(self._jit_kernel_constants,
+                                                      get_types=True)
+            self._cfunction.argtypes = argtypes
 
         return self._cfunction
 
@@ -957,7 +942,7 @@ def apply(self, **kwargs):
         # Output summary of performance achieved
         return self._emit_apply_profiling(args)
 
-    def _construct_cfunction_args(self, args, get_types = False):
+    def _construct_cfunction_args(self, args, get_types=False):
         """
         Either construct the args for the cfunction, or construct the
         arg types for it.
@@ -965,7 +950,7 @@ def _construct_cfunction_args(self, args, get_types = False):
         ps = {
             p._C_name: p._C_ctype for p in self.parameters
         }
-        
+
         things = []
         things_types = []
 
@@ -982,37 +967,6 @@ def _construct_cfunction_args(self, args, get_types = False):
         else:
             return things
 
-    def _emit_build_profiling(self):
-        if not is_log_enabled_for('PERF'):
-            return
-
-        # Rounder to K decimal places
-        fround = lambda i, n=100: ceil(i * n) / n
-
-        timings = self._profiler.py_timers.copy()
-
-        tot = timings.pop('op-compile')
-        perf("Operator `%s` generated in %.2f s" % (self.name, fround(tot)))
-
-        max_hotspots = 3
-        threshold = 20.
-
-        def _emit_timings(timings, indent=''):
-            timings.pop('total', None)
-            entries = sorted(timings, key=lambda i: timings[i]['total'], reverse=True)
-            for i in entries[:max_hotspots]:
-                v = fround(timings[i]['total'])
-                perc = fround(v/tot*100, n=10)
-                if perc > threshold:
-                    perf("%s%s: %.2f s (%.1f %%)" % (indent, i.lstrip('_'), v, perc))
-                    _emit_timings(timings[i], ' '*len(indent) + ' * ')
-
-        _emit_timings(timings, '  * ')
-
-        if self._profiler._ops:
-            ops = ['%d --> %d' % i for i in self._profiler._ops]
-            perf("Flops reduction after symbolic optimization: [%s]" % ' ; '.join(ops))
-
     def _emit_apply_profiling(self, args):
         """Produce a performance summary of the profiled sections."""
         # Rounder to 2 decimal places
@@ -1188,10 +1142,6 @@ def rcompile(expressions, kwargs=None):
 
 # Misc helpers
 
-
-IRs = namedtuple('IRs', 'expressions clusters stree uiet iet')
-
-
 class ArgumentsMap(dict):
 
     def __init__(self, args, grid, op):
@@ -1345,6 +1295,5 @@ def parse_kwargs(**kwargs):
 
 def get_arg_names_from_module(op):
     return [
-        str_attr.data 
-        for str_attr in op.body.block.ops.first.attributes['param_names'].data
+        str_attr.data for str_attr in op.body.block.ops.first.attributes['param_names'].data  # noqa
     ]
diff --git a/tests/test_xdsl_base.py b/tests/test_xdsl_base.py
index 6953720a72..090bcbf9d9 100644
--- a/tests/test_xdsl_base.py
+++ b/tests/test_xdsl_base.py
@@ -213,3 +213,39 @@ def test_acoustic_3D(shape, so, to, nt):
     xdsl_norm = norm(u)
 
     assert np.isclose(devito_norm, xdsl_norm, rtol=1e-04).all()
+
+
+@pytest.mark.parametrize('shape', [(21, 21, 21)])
+@pytest.mark.parametrize('so', [2, 4])
+@pytest.mark.parametrize('to', [2])
+@pytest.mark.parametrize('nt', [20])
+def test_standard_mlir_rewrites(shape, so, to, nt):
+
+    grid = Grid(shape=shape)
+    dt = 0.0001
+
+    # Define the wavefield with the size of the model and the time dimension
+    u = TimeFunction(name="u", grid=grid, time_order=to, space_order=so)
+
+    pde = u.dt2 - u.laplace
+    eq0 = solve(pde, u.forward)
+
+    stencil = Eq(u.forward, eq0)
+    u.data[:, :, :] = 0
+    u.data[:, 40:50, 40:50] = 1
+
+    # Devito Operator
+    op = Operator([stencil])
+    op.apply(time=nt, dt=dt)
+
+    u.data[:, :, :] = 0
+    u.data[:, 40:50, 40:50] = 1
+
+    # XDSL Operator
+    xdslop = XDSLOperator([stencil])
+    xdslop.apply(time=nt, dt=dt)
+
+    from devito.ir.ietxdsl.lowering import iet_to_standard_mlir
+
+    # Check coverage of unused iet iet_to_standard_mlir
+    iet_to_standard_mlir(xdslop._module)

From bcab46278cc4cad36f53f3e7db294655f0d18e30 Mon Sep 17 00:00:00 2001
From: George Bisbas <g.bisbas18@imperial.ac.uk>
Date: Tue, 28 Nov 2023 17:21:39 +0000
Subject: [PATCH 4/6] cleanup: drop more obsolete code

---
 devito/ir/ietxdsl/cluster_to_ssa.py | 82 ++++++++++++-----------------
 devito/xdslpasses/__init__.py       |  1 -
 devito/xdslpasses/iet/__init__.py   |  1 -
 devito/xdslpasses/iet/parpragma.py  | 57 --------------------
 4 files changed, 35 insertions(+), 106 deletions(-)
 delete mode 100644 devito/xdslpasses/__init__.py
 delete mode 100644 devito/xdslpasses/iet/__init__.py
 delete mode 100644 devito/xdslpasses/iet/parpragma.py

diff --git a/devito/ir/ietxdsl/cluster_to_ssa.py b/devito/ir/ietxdsl/cluster_to_ssa.py
index a452919a82..d6474dfa60 100644
--- a/devito/ir/ietxdsl/cluster_to_ssa.py
+++ b/devito/ir/ietxdsl/cluster_to_ssa.py
@@ -1,12 +1,21 @@
 # ------------- General imports -------------#
 
 from typing import Any
+from dataclasses import dataclass, field
 from sympy import Add, Expr, Float, Indexed, Integer, Mod, Mul, Pow, Symbol
 
 # ------------- xdsl imports -------------#
-from xdsl.dialects import arith, builtin, func, memref, scf, stencil, gpu
+from xdsl.dialects import (arith, builtin, func, memref, scf,
+                           stencil, gpu, llvm)
 from xdsl.dialects.experimental import math
 from xdsl.ir import Block, Operation, OpResult, Region, SSAValue
+from xdsl.pattern_rewriter import (
+    GreedyRewritePatternApplier,
+    PatternRewriter,
+    PatternRewriteWalker,
+    RewritePattern,
+    op_type_rewrite_pattern,
+)
 
 # ------------- devito imports -------------#
 from devito import Grid, SteppingDimension
@@ -18,7 +27,7 @@
 from devito.ir.ietxdsl import iet_ssa
 from devito.ir.ietxdsl.utils import is_int, is_float
 from devito.ir.ietxdsl.ietxdsl_functions import dtypes_to_xdsltypes
-
+from devito.ir.ietxdsl.lowering import LowerIetForToScfFor
 
 # flake8: noqa
 
@@ -121,34 +130,28 @@ def _convert_eq(self, eq: LoweredEq):
         )
 
     def _visit_math_nodes(self, node: Expr) -> SSAValue:
+        # Handle Indexeds
         if isinstance(node, Indexed):
             offsets = _get_dim_offsets(node, self.time_offs)
             return self.loaded_values[offsets]
-        if isinstance(node, Integer):
+        # Handle Integers
+        elif isinstance(node, Integer):
             cst = arith.Constant.from_int_and_width(int(node), builtin.i64)
             self.block.add_op(cst)
             return cst.result
-        if isinstance(node, Float):
+        # Handle Floats
+        elif isinstance(node, Float):
             cst = arith.Constant.from_float_and_width(float(node), builtin.f32)
             self.block.add_op(cst)
             return cst.result
-        # if isinstance(math, Constant):
-        #    symb = iet_ssa.LoadSymbolic.get(math.name, dtypes_to_xdsltypes[math.dtype])
-        #    self.block.add_op(symb)
-        #    return symb.result
-        if isinstance(node, Symbol):
+        # Handle Symbols
+        elif isinstance(node, Symbol):
             symb = iet_ssa.LoadSymbolic.get(node.name, builtin.f32)
             self.block.add_op(symb)
-            return symb.result
-
-        # handle all of the math
-        if not isinstance(node, (Add, Mul, Pow, Mod)):
-            raise ValueError(f"Unknown math: {node}", node)
-
-        args = [self._visit_math_nodes(arg) for arg in node.args]
-
-        # make sure all args are the same type:
-        if isinstance(node, (Add, Mul)):
+            return symb.result     
+        # Handle Add Mul
+        elif isinstance(node, (Add, Mul)):
+            args = [self._visit_math_nodes(arg) for arg in node.args]
             # add casts when necessary
             # get first element out, store the rest in args
             # this makes the reduction easier
@@ -160,14 +163,14 @@ def _visit_math_nodes(self, node: Expr) -> SSAValue:
                 op_cls = arith.Addf if isinstance(node, Add) else arith.Mulf
             else:
                 raise("Add support for another type")
-
             for arg in args:
                 op = op_cls(carry, arg)
                 self.block.add_op(op)
                 carry = op.result
             return carry
-
-        if isinstance(node, Pow):
+        # Handle Pow
+        elif isinstance(node, Pow):
+            args = [self._visit_math_nodes(arg) for arg in node.args]
             assert len(args) == 2, "can't pow with != 2 args!"
             base, ex = args
             if is_int(base):
@@ -188,11 +191,12 @@ def _visit_math_nodes(self, node: Expr) -> SSAValue:
             op = op_cls.get(base, ex)
             self.block.add_op(op)
             return op.result
+        # Handle Mod
+        elif isinstance(node, Mod):
+            raise NotImplementedError("Go away, no mod here. >:(")
+        else:
+            raise NotImplementedError(f"Unknown math: {node}", node)
 
-        if isinstance(node, Mod):
-            raise ValueError("Go away, no mod here. >:(")
-
-        raise ValueError("Unknown math!")
 
     def _add_access_ops(
         self, reads: list[Indexed], time_offset_to_field: dict[int, SSAValue]
@@ -257,10 +261,10 @@ def _ensure_same_type(self, *vals: SSAValue):
         if all(is_float(val) for val in vals):
             return vals
         # not everything homogeneous
-        new_vals = []
+        processed = []
         for val in vals:
             if is_float(val):
-                new_vals.append(val)
+                processed.append(val)
                 continue
             # if the val is the result of a arith.constant with no uses,
             # we change the type of the arith.constant to our desired type
@@ -273,13 +277,13 @@ def _ensure_same_type(self, *vals: SSAValue):
                 val.op.attributes["value"] = builtin.FloatAttr(
                     float(val.op.value.value.data), builtin.f32
                 )
-                new_vals.append(val)
+                processed.append(val)
                 continue
             # insert an integer to float cast op
             conv = arith.SIToFPOp(val, builtin.f32)
             self.block.add_op(conv)
-            new_vals.append(conv.result)
-        return new_vals
+            processed.append(conv.result)
+        return processed
 
 
 def _get_dim_offsets(idx: Indexed, t_offset: int) -> tuple:
@@ -305,22 +309,6 @@ def _get_dim_offsets(idx: Indexed, t_offset: int) -> tuple:
 #                                                          ####
 # -------------------------------------------------------- ####
 
-from dataclasses import dataclass, field
-
-from xdsl.pattern_rewriter import (
-    GreedyRewritePatternApplier,
-    PatternRewriter,
-    PatternRewriteWalker,
-    RewritePattern,
-    op_type_rewrite_pattern,
-)
-
-from devito.ir.ietxdsl.lowering import (
-    LowerIetForToScfFor,
-)
-
-from xdsl.dialects import llvm
-
 @dataclass
 class WrapFunctionWithTransfers(RewritePattern):
     func_name: str
diff --git a/devito/xdslpasses/__init__.py b/devito/xdslpasses/__init__.py
deleted file mode 100644
index 4eb86f53f3..0000000000
--- a/devito/xdslpasses/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .iet import Callable  # noqa
\ No newline at end of file
diff --git a/devito/xdslpasses/iet/__init__.py b/devito/xdslpasses/iet/__init__.py
deleted file mode 100644
index feeeddfa40..0000000000
--- a/devito/xdslpasses/iet/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .parpragma import Callable  # noqa
diff --git a/devito/xdslpasses/iet/parpragma.py b/devito/xdslpasses/iet/parpragma.py
deleted file mode 100644
index 97f0a519b3..0000000000
--- a/devito/xdslpasses/iet/parpragma.py
+++ /dev/null
@@ -1,57 +0,0 @@
-from devito.ir.ietxdsl import *
-from xdsl.pattern_rewriter import RewritePattern, GreedyRewritePatternApplier, \
-    op_type_rewrite_pattern, PatternRewriteWalker, PatternRewriter
-import xdsl.dialects.builtin as builtin
-
-
-# NOTE: this is WIP and needs refactoring ;)
-@dataclass
-class MakeSimdPattern(RewritePattern):
-    """
-    This pattern reproduces the behaviour of PragmaSimdTransformer
-    """
-
-    def is_parallel_relaxed(self, iteration: Iteration) -> bool:
-        return any([
-            prop.data
-            in ["parallel", "parallel_if_private", "parallel_if_private"]
-            for prop in iteration.properties.data
-        ])
-
-    @op_type_rewrite_pattern
-    def match_and_rewrite(self, iteration: Iteration,
-                          rewriter: PatternRewriter):
-
-        if (not self.is_parallel_relaxed(iteration)):
-            return
-
-        # check if parent is parallel as well
-        parent_op = iteration.parent.parent.parent
-        if (not self.is_parallel_relaxed(parent_op)):
-            return
-
-        # TODO how to only check for iteration trees?
-        # NOTE: currently only checking the first child
-        child_ops = iteration.body.blocks[0].ops
-
-        # check if children is parallel as well
-        if (isinstance(child_ops[0], Iteration)
-                and self.is_parallel_relaxed(child_ops[0])):
-            return
-
-        # TODO: insert additional checks
-        # toreview
-        # iteration.pragmas.data.append(StringAttr("simd-for"))
-
-
-def construct_walker() -> PatternRewriteWalker:
-    applier = GreedyRewritePatternApplier([MakeSimdPattern()])
-
-    return PatternRewriteWalker(applier,
-                                walk_regions_first=False,
-                                apply_recursively=False)
-
-
-def make_simd(ctx, op: builtin.ModuleOp):
-    walker = construct_walker()
-    walker.rewrite_module(op)

From 40c56d1ecd296546c55435c139d917b725d376d9 Mon Sep 17 00:00:00 2001
From: George Bisbas <g.bisbas18@imperial.ac.uk>
Date: Tue, 28 Nov 2023 17:54:16 +0000
Subject: [PATCH 5/6] xdsl_operator: More cleanup

---
 devito/operator/xdsl_operator.py | 893 +------------------------------
 1 file changed, 1 insertion(+), 892 deletions(-)

diff --git a/devito/operator/xdsl_operator.py b/devito/operator/xdsl_operator.py
index 4c9e3e036b..7eddd7d30f 100644
--- a/devito/operator/xdsl_operator.py
+++ b/devito/operator/xdsl_operator.py
@@ -262,14 +262,6 @@ def setup_memref_args(self):
 
         self._jit_kernel_constants.update(args)
 
-    @classmethod
-    def _normalize_kwargs(cls, **kwargs):
-        return kwargs
-
-    @classmethod
-    def _check_kwargs(cls, **kwargs):
-        return
-
     @classmethod
     def _build(cls, expressions, **kwargs) -> Callable:
         debug("-Building operator")
@@ -295,6 +287,7 @@ def _build(cls, expressions, **kwargs) -> Callable:
 
         # Required for the jit-compilation
         op._compiler = kwargs['compiler']
+        op._language = kwargs['language']
         op._lib = None
         op._cfunction = None
 
@@ -323,10 +316,6 @@ def _build(cls, expressions, **kwargs) -> Callable:
 
         return op
 
-    def __init__(self, *args, **kwargs):
-        # Bypass the silent call to __init__ triggered through the backends engine
-        pass
-
     # Compilation -- Expression level
 
     @classmethod
@@ -368,433 +357,6 @@ def _lower(cls, expressions, **kwargs):
 
         return IRs(expressions, clusters, stree, uiet, iet), byproduct, module
 
-    @classmethod
-    def _rcompile_wrapper(cls, **kwargs):
-        def wrapper(expressions, kwargs=kwargs):
-            return rcompile(expressions, kwargs)
-        return wrapper
-
-    @classmethod
-    def _initialize_state(cls, **kwargs):
-        return {}
-
-    @classmethod
-    def _specialize_dsl(cls, expressions, **kwargs):
-        """
-        Backend hook for specialization at the DSL level. The input is made of
-        expressions and other higher order objects such as Injection or
-        Interpolation; the expressions are still unevaluated at this stage,
-        meaning that they are still in tensorial form and derivatives aren't
-        expanded yet.
-        """
-        return expressions
-
-    @classmethod
-    def _specialize_exprs(cls, expressions, **kwargs):
-        """
-        Backend hook for specialization at the expression level.
-        """
-        return expressions
-
-    @classmethod
-    @timed_pass(name='lowering.Expressions')
-    def _lower_exprs(cls, expressions, **kwargs):
-        """
-        Expression lowering:
-
-            * Apply rewrite rules;
-            * Evaluate derivatives;
-            * Flatten vectorial equations;
-            * Indexify Functions;
-            * Apply substitution rules;
-            * Shift indices for domain alignment.
-        """
-        expand = kwargs['options'].get('expand', True)
-
-        # Specialization is performed on unevaluated expressions
-        expressions = cls._specialize_dsl(expressions, **kwargs)
-
-        # Lower FD derivatives
-        # NOTE: we force expansion of derivatives along SteppingDimensions
-        # because it drastically simplifies the subsequent lowering into
-        # ModuloDimensions
-        if not expand:
-            expand = lambda d: d.is_Stepping
-        expressions = flatten([i._evaluate(expand=expand) for i in expressions])
-
-        # Scalarize the tensor equations, if any
-        expressions = [j for i in expressions for j in i._flatten]
-
-        # A second round of specialization is performed on evaluated expressions
-        expressions = cls._specialize_exprs(expressions, **kwargs)
-
-        # "True" lowering (indexification, shifting, ...)
-        expressions = lower_exprs(expressions, **kwargs)
-
-        processed = [LoweredEq(i) for i in expressions]
-
-        return processed
-
-    # Compilation -- Cluster level
-
-    @classmethod
-    def _specialize_clusters(cls, clusters, **kwargs):
-        """
-        Backend hook for specialization at the Cluster level.
-        """
-        return clusters
-
-    @classmethod
-    @timed_pass(name='lowering.Clusters')
-    def _lower_clusters(cls, expressions, profiler=None, **kwargs):
-        """
-        Clusters lowering:
-
-            * Group expressions into Clusters;
-            * Introduce guards for conditional Clusters;
-            * Analyze Clusters to detect computational properties such
-              as parallelism.
-            * Optimize Clusters for performance
-        """
-        sregistry = kwargs['sregistry']
-
-        # Build a sequence of Clusters from a sequence of Eqs
-        clusters = clusterize(expressions, **kwargs)
-
-        # Operation count before specialization
-        init_ops = sum(estimate_cost(c.exprs) for c in clusters if c.is_dense)
-
-        clusters = cls._specialize_clusters(clusters, **kwargs)
-
-        # Operation count after specialization
-        final_ops = sum(estimate_cost(c.exprs) for c in clusters if c.is_dense)
-        try:
-            profiler.record_ops_variation(init_ops, final_ops)
-        except AttributeError:
-            pass
-
-        # Generate implicit Clusters from higher level abstractions
-        clusters = generate_implicit(clusters, sregistry=sregistry)
-
-        # Lower all remaining high order symbolic objects
-        clusters = lower_index_derivatives(clusters, **kwargs)
-
-        # Make sure no reconstructions can unpick any of the symbolic
-        # optimizations performed so far
-        clusters = unevaluate(clusters)
-
-        return ClusterGroup(clusters)
-
-    # Compilation -- ScheduleTree level
-
-    @classmethod
-    def _specialize_stree(cls, stree, **kwargs):
-        """
-        DEPRECATED: Backend hook for specialization at the Schedule tree level.
-        """
-        return stree
-
-    @classmethod
-    @timed_pass(name='lowering.ScheduleTree')
-    def _lower_stree(cls, clusters, **kwargs):
-        """
-        Schedule tree lowering:
-
-            * Turn a sequence of Clusters into a ScheduleTree;
-            * Derive and attach metadata for distributed-memory parallelism;
-            * Derive sections for performance profiling
-        """
-        # DEPRECATED: Build a ScheduleTree from a sequence of Clusters
-        stree = stree_build(clusters, **kwargs)
-        stree = cls._specialize_stree(stree)
-
-        return stree
-
-    # Compilation -- Iteration/Expression tree level
-
-    @classmethod
-    def _specialize_iet(cls, graph, **kwargs):
-        """
-        Backend hook for specialization at the Iteration/Expression tree level.
-        """
-        return graph
-
-    @classmethod
-    @timed_pass(name='lowering.uIET')
-    def _lower_uiet(cls, stree, profiler=None, **kwargs):
-        """
-        Turn a ScheduleTree into an unbounded Iteration/Expression tree, that is
-        in essence a "floating" IET where one or more variables may be unbounded
-        (i.e., no definition placed yet).
-        """
-        # Build an unbounded IET from a ScheduleTree
-        uiet = iet_build(stree)
-
-        # Analyze the IET Sections for C-level profiling
-        try:
-            profiler.analyze(uiet)
-        except AttributeError:
-            pass
-
-        return uiet
-
-    @classmethod
-    @timed_pass(name='lowering.IET')
-    def _lower_iet(cls, uiet, profiler=None, **kwargs):
-        """
-        Iteration/Expression tree lowering:
-
-            * Introduce distributed-memory, shared-memory, and SIMD parallelism;
-            * Introduce optimizations for data locality;
-            * Finalize (e.g., symbol definitions, array casts)
-        """
-        name = kwargs.get("name", "Kernel")
-        sregistry = kwargs['sregistry']
-
-        # Wrap the IET with an EntryFunction (a special Callable representing
-        # the entry point of the generated library)
-        parameters = derive_parameters(uiet, True)
-        iet = EntryFunction(name, uiet, 'int', parameters, ())
-
-        # Lower IET to a target-specific IET
-        graph = Graph(iet, sregistry=sregistry)
-        graph = cls._specialize_iet(graph, **kwargs)
-
-        # Instrument the IET for C-level profiling
-        # Note: this is postponed until after _specialize_iet because during
-        # specialization further Sections may be introduced
-        cls._Target.instrument(graph, profiler=profiler, **kwargs)
-
-        # Extract the necessary macros from the symbolic objects
-        generate_macros(graph)
-
-        # Target-independent optimizations
-        minimize_symbols(graph)
-
-        return graph.root, graph
-
-    # Read-only properties exposed to the outside world
-
-    @cached_property
-    def reads(self):
-        return tuple(self._reads)
-
-    @cached_property
-    def writes(self):
-        return tuple(self._writes)
-
-    @cached_property
-    def dimensions(self):
-        ret = set().union(*[d._defines for d in self._dimensions])
-
-        # During compilation other Dimensions may have been produced
-        dimensions = FindSymbols('dimensions').visit(self)
-        ret.update(d for d in dimensions if d.is_PerfKnob)
-
-        ret = tuple(sorted(ret, key=attrgetter('name')))
-
-        return ret
-
-    @cached_property
-    def input(self):
-        return tuple(i for i in self.parameters if i.is_Input)
-
-    @cached_property
-    def temporaries(self):
-        return tuple(i for i in self.parameters if i.is_TempFunction)
-
-    @cached_property
-    def objects(self):
-        return tuple(i for i in self.parameters if i.is_Object)
-
-    # Arguments processing
-
-    @cached_property
-    def _access_modes(self):
-        """
-        A table providing the AccessMode of all user-accessible symbols in `self`.
-        """
-        return frozendict({i: AccessMode(i in self.reads, i in self.writes)
-                           for i in self.input})
-
-    def _prepare_arguments(self, autotune=None, **kwargs):
-        """
-        Process runtime arguments passed to ``.apply()` and derive
-        default values for any remaining arguments.
-        """
-        # Sanity check -- all user-provided keywords must be known to the Operator
-        if not configuration['ignore-unknowns']:
-            for k, v in kwargs.items():
-                if k not in self._known_arguments:
-                    raise ValueError("Unrecognized argument %s=%s" % (k, v))
-
-        # Pre-process Dimension overrides. This may help ruling out ambiguities
-        # when processing the `defaults` arguments. A topological sorting is used
-        # as DerivedDimensions may depend on their parents
-        nodes = self.dimensions
-        edges = [(i, i.parent) for i in self.dimensions
-                 if i.is_Derived and i.parent in set(nodes)]
-        toposort = DAG(nodes, edges).topological_sort()
-
-        futures = {}
-        for d in reversed(toposort):
-            if set(d._arg_names).intersection(kwargs):
-                futures.update(d._arg_values(self._dspace[d], args={}, **kwargs))
-
-        overrides, defaults = split(self.input, lambda p: p.name in kwargs)
-
-        # Process data-carrier overrides
-        args = kwargs['args'] = ReducerMap()
-        for p in overrides:
-            args.update(p._arg_values(**kwargs))
-            try:
-                args.reduce_inplace()
-            except ValueError:
-                raise ValueError("Override `%s` is incompatible with overrides `%s`" %
-                                 (p, [i for i in overrides if i.name in args]))
-
-        # Process data-carrier defaults
-        for p in defaults:
-            if p.name in args:
-                # E.g., SubFunctions
-                continue
-            for k, v in p._arg_values(**kwargs).items():
-                if k not in args:
-                    args[k] = v
-                elif k in futures:
-                    # An explicit override is later going to set `args[k]`
-                    pass
-                elif k in kwargs:
-                    # User is in control
-                    # E.g., given a ConditionalDimension `t_sub` with factor `fact` and
-                    # a TimeFunction `usave(t_sub, x, y)`, an override for `fact` is
-                    # supplied w/o overriding `usave`; that's legal
-                    pass
-                elif is_integer(args[k]) and not contains_val(args[k], v):
-                    raise ValueError("Default `%s` is incompatible with other args as "
-                                     "`%s=%s`, while `%s=%s` is expected. Perhaps you "
-                                     "forgot to override `%s`?" %
-                                     (p, k, v, k, args[k], p))
-
-        args = kwargs['args'] = args.reduce_all()
-
-        # DiscreteFunctions may be created from CartesianDiscretizations, which in
-        # turn could be Grids or SubDomains. Both may provide arguments
-        discretizations = {getattr(kwargs[p.name], 'grid', None) for p in overrides}
-        discretizations.update({getattr(p, 'grid', None) for p in defaults})
-        discretizations.discard(None)
-        # Remove subgrids if multiple grids
-        if len(discretizations) > 1:
-            discretizations = {g for g in discretizations
-                               if not any(d.is_Derived for d in g.dimensions)}
-
-        for i in discretizations:
-            args.update(i._arg_values(**kwargs))
-
-        # There can only be one Grid from which DiscreteFunctions were created
-        grids = {i for i in discretizations if isinstance(i, Grid)}
-        if len(grids) > 1:
-            # We loosely tolerate multiple Grids for backwards compatibility
-            # with spacial subsampling, which should be revisited however. And
-            # With MPI it would definitely break!
-            if configuration['mpi']:
-                raise ValueError("Multiple Grids found")
-        try:
-            grid = grids.pop()
-        except KeyError:
-            grid = None
-
-        # An ArgumentsMap carries additional metadata that may be used by
-        # the subsequent phases of the arguments processing
-        args = kwargs['args'] = ArgumentsMap(args, grid, self)
-
-        # Process Dimensions
-        for d in reversed(toposort):
-            args.update(d._arg_values(self._dspace[d], grid, **kwargs))
-
-        # Process Objects
-        for o in self.objects:
-            args.update(o._arg_values(grid=grid, **kwargs))
-
-        # In some "lower-level" Operators implementing a random piece of C, such as
-        # one or more calls to third-party library functions, there could still be
-        # at this point unprocessed arguments (e.g., scalars)
-        kwargs.pop('args')
-        args.update({k: v for k, v in kwargs.items() if k not in args})
-
-        # Sanity check
-        for p in self.parameters:
-            p._arg_check(args, self._dspace[p], am=self._access_modes.get(p))
-        for d in self.dimensions:
-            if d.is_Derived:
-                d._arg_check(args, self._dspace[p])
-
-        # Turn arguments into a format suitable for the generated code
-        # E.g., instead of NumPy arrays for Functions, the generated code expects
-        # pointers to ctypes.Struct
-        for p in self.parameters:
-            try:
-                args.update(kwargs.get(p.name, p)._arg_finalize(args, alias=p))
-            except AttributeError:
-                # User-provided floats/ndarray obviously do not have `_arg_finalize`
-                args.update(p._arg_finalize(args, alias=p))
-
-        # Execute autotuning and adjust arguments accordingly
-        args.update(self._autotune(args, autotune or configuration['autotuning']))
-
-        return args
-
-    def _postprocess_arguments(self, args, **kwargs):
-        """Process runtime arguments upon returning from ``.apply()``."""
-        for p in self.parameters:
-            try:
-                subfuncs = (args[getattr(p, s).name] for s in p._sub_functions)
-                p._arg_apply(args[p.name], *subfuncs, alias=kwargs.get(p.name))
-            except AttributeError:
-                p._arg_apply(args[p.name], alias=kwargs.get(p.name))
-
-    @cached_property
-    def _known_arguments(self):
-        """The arguments that can be passed to ``apply`` when running the Operator."""
-        ret = set()
-        for i in self.input:
-            ret.update(i._arg_names)
-            try:
-                ret.update(i.grid._arg_names)
-            except AttributeError:
-                pass
-        for d in self.dimensions:
-            ret.update(d._arg_names)
-        ret.update(p.name for p in self.parameters)
-        return frozenset(ret)
-
-    def _autotune(self, args, setup):
-        """Auto-tuning to improve runtime performance."""
-        return args
-
-    def arguments(self, **kwargs):
-        """Arguments to run the Operator."""
-        args = self._prepare_arguments(**kwargs)
-        # Check all arguments are present
-        for p in self.parameters:
-            if args.get(p.name) is None:
-                raise ValueError("No value found for parameter %s" % p.name)
-        return args
-
-    # Code generation and JIT compilation
-
-    # @cached_property
-    # def _soname(self):
-    #     """A unique name for the shared object resulting from JIT compilation."""
-    #     return Signer._digest(self, configuration)
-
-    @cached_property
-    def ccode(self):
-        try:
-            return self._ccode_handler(compiler=self._compiler).visit(self)
-        except (AttributeError, TypeError):
-            from devito.ir.iet.visitors import CGen
-            return CGen(compiler=self._compiler).visit(self)
 
     @property
     def cfunction(self):
@@ -814,134 +376,6 @@ def cfunction(self):
 
         return self._cfunction
 
-    def cinterface(self, force=False):
-        """
-        Generate two files under the prescribed temporary directory:
-
-            * `X.c` (or `X.cpp`): the code generated for this Operator;
-            * `X.h`: an header file representing the interface of `X.c`.
-
-        Where `X=self.name`.
-
-        Parameters
-        ----------
-        force : bool, optional
-            Overwrite any existing files. Defaults to False.
-        """
-        dest = self._compiler.get_jit_dir()
-        name = dest.joinpath(self.name)
-
-        cfile = name.with_suffix(".%s" % self._compiler.src_ext)
-        hfile = name.with_suffix('.h')
-
-        # Generate the .c and .h code
-        ccode, hcode = CInterface().visit(self)
-
-        for f, code in [(cfile, ccode), (hfile, hcode)]:
-            if not force and f.is_file():
-                debug("`%s` was not saved in `%s` as it already exists" % (f.name, dest))
-            else:
-                with open(str(f), 'w') as ff:
-                    ff.write(str(code))
-                debug("`%s` successfully saved in `%s`" % (f.name, dest))
-
-        return ccode, hcode
-
-    # Execution
-
-    def __call__(self, **kwargs):
-        return self.apply(**kwargs)
-
-    def apply(self, **kwargs):
-        """
-        Execute the Operator.
-
-        With no arguments provided, the Operator runs using the data carried by the
-        objects appearing in the input expressions -- these are referred to as the
-        "default arguments".
-
-        Optionally, any of the Operator default arguments may be replaced by passing
-        suitable key-value arguments. Given ``apply(k=v, ...)``, ``(k, v)`` may be
-        used to:
-
-        * replace a Constant. In this case, ``k`` is the name of the Constant,
-          ``v`` is either a Constant or a scalar value.
-
-        * replace a Function (SparseFunction). Here, ``k`` is the name of the
-          Function, ``v`` is either a Function or a numpy.ndarray.
-
-        * alter the iteration interval along a Dimension. Consider a generic
-          Dimension ``d`` iterated over by the Operator.  By default, the Operator
-          runs over all iterations within the compact interval ``[d_m, d_M]``,
-          where ``d_m`` and ``d_M`` are, respectively, the smallest and largest
-          integers not causing out-of-bounds memory accesses (for the Grid
-          Dimensions, this typically implies iterating over the entire physical
-          domain). So now ``k`` can be either ``d_m`` or ``d_M``, while ``v``
-          is an integer value.
-
-        Examples
-        --------
-        Consider the following Operator
-
-        >>> from devito import Eq, Grid, TimeFunction, Operator
-        >>> grid = Grid(shape=(3, 3))
-        >>> u = TimeFunction(name='u', grid=grid, save=3)
-        >>> op = Operator(Eq(u.forward, u + 1))
-
-        The Operator is run by calling ``apply``
-
-        >>> summary = op.apply()
-
-        The variable ``summary`` contains information about runtime performance.
-        As no key-value parameters are specified, the Operator runs with its
-        default arguments, namely ``u=u, x_m=0, x_M=2, y_m=0, y_M=2, time_m=0,
-        time_M=1``.
-
-        At this point, the same Operator can be used for a completely different
-        run, for example
-
-        >>> u2 = TimeFunction(name='u', grid=grid, save=5)
-        >>> summary = op.apply(u=u2, x_m=1, y_M=1)
-
-        Now, the Operator will run with a different set of arguments, namely
-        ``u=u2, x_m=1, x_M=2, y_m=0, y_M=1, time_m=0, time_M=3``.
-
-        To run an Operator that only uses buffered TimeFunctions, the maximum
-        iteration point along the time dimension must be explicitly specified
-        (otherwise, the Operator wouldn't know how many iterations to run).
-
-        >>> u3 = TimeFunction(name='u', grid=grid)
-        >>> op = Operator(Eq(u3.forward, u3 + 1))
-        >>> summary = op.apply(time_M=10)
-        """
-        # Build the arguments list to invoke the kernel function
-        with self._profiler.timer_on('arguments'):
-            args = self.arguments(**kwargs)
-            self._jit_kernel_constants = args
-
-        cfunction = self.cfunction
-        try:
-            # Invoke kernel function with args
-            arg_values = self._construct_cfunction_args(args)
-            with self._profiler.timer_on('apply', comm=args.comm):
-                cfunction(*arg_values)
-        except ctypes.ArgumentError as e:
-            if e.args[0].startswith("argument "):
-                argnum = int(e.args[0][9:].split(':')[0]) - 1
-                newmsg = "error in argument '%s' with value '%s': %s" % (
-                    self.parameters[argnum].name,
-                    arg_values[argnum],
-                    e.args[0])
-                raise ctypes.ArgumentError(newmsg) from e
-            else:
-                raise
-
-        # Post-process runtime arguments
-        self._postprocess_arguments(args, **kwargs)
-
-        # Output summary of performance achieved
-        return self._emit_apply_profiling(args)
-
     def _construct_cfunction_args(self, args, get_types=False):
         """
         Either construct the args for the cfunction, or construct the
@@ -967,331 +401,6 @@ def _construct_cfunction_args(self, args, get_types=False):
         else:
             return things
 
-    def _emit_apply_profiling(self, args):
-        """Produce a performance summary of the profiled sections."""
-        # Rounder to 2 decimal places
-        fround = lambda i: ceil(i * 100) / 100
-
-        elapsed = fround(self._profiler.py_timers['apply'])
-        info("Operator `%s` ran in %.2f s" % (self.name, elapsed))
-
-        summary = self._profiler.summary(args, self._dtype, reduce_over=elapsed)
-
-        if not is_log_enabled_for('PERF'):
-            # Do not waste time
-            return summary
-
-        if summary.globals:
-            # Note that with MPI enabled, the global performance indicators
-            # represent "cross-rank" performance data
-            metrics = []
-
-            v = summary.globals.get('vanilla')
-            if v is not None:
-                metrics.append("OI=%.2f" % fround(v.oi))
-                metrics.append("%.2f GFlops/s" % fround(v.gflopss))
-
-            v = summary.globals.get('fdlike')
-            if v is not None:
-                metrics.append("%.2f GPts/s" % fround(v.gpointss))
-
-            if metrics:
-                perf("Global performance: [%s]" % ', '.join(metrics))
-
-            perf("Local performance:")
-            indent = " "*2
-        else:
-            indent = ""
-
-            if isinstance(self._profiler, AdvancedProfilerVerbose):
-                metrics = []
-
-                v = summary.globals.get('fdlike-nosetup')
-                if v is not None:
-                    metrics.append("%.2f GPts/s" % fround(v.gpointss))
-
-                if metrics:
-                    perf("Global performance <w/o setup>: [%s]" % ', '.join(metrics))
-
-        # Emit local, i.e. "per-rank" performance. Without MPI, this is the only
-        # thing that will be emitted
-        def lower_perfentry(v):
-            if v.gflopss:
-                oi = "OI=%.2f" % fround(v.oi)
-                gflopss = "%.2f GFlops/s" % fround(v.gflopss)
-                gpointss = "%.2f GPts/s" % fround(v.gpointss)
-                return "[%s]" % ", ".join([oi, gflopss, gpointss])
-            elif v.gpointss:
-                gpointss = "%.2f GPts/s" % fround(v.gpointss)
-                return "[%s]" % gpointss
-            else:
-                return ""
-
-        for k, v in summary.items():
-            rank = "[rank%d]" % k.rank if k.rank is not None else ""
-
-            metrics = lower_perfentry(v)
-
-            itershapes = [",".join(str(i) for i in its) for its in v.itershapes]
-            if len(itershapes) > 1:
-                itershapes = ",".join("<%s>" % i for i in itershapes)
-            elif len(itershapes) == 1:
-                itershapes = itershapes[0]
-            else:
-                itershapes = ""
-            name = "%s%s<%s>" % (k.name, rank, itershapes)
-
-            perf("%s* %s ran in %.2f s %s" % (indent, name, fround(v.time), metrics))
-            for n, v1 in summary.subsections.get(k.name, {}).items():
-                metrics = lower_perfentry(v1)
-
-                perf("%s+ %s ran in %.2f s [%.2f%%] %s" %
-                     (indent*2, n, fround(v1.time), fround(v1.time/v.time*100),
-                      metrics))
-
-        # Emit performance mode and arguments
-        perf_args = {}
-        for i in self.input + self.dimensions:
-            if not i.is_PerfKnob:
-                continue
-            try:
-                perf_args[i.name] = args[i.name]
-            except KeyError:
-                # Try with the aliases
-                for a in i._arg_names:
-                    if a in args:
-                        perf_args[a] = args[a]
-                        break
-        perf("Performance[mode=%s] arguments: %s" % (self._mode, perf_args))
-
-        return summary
-
-    # Pickling support
-
-    def __getstate__(self):
-        if self._lib:
-            state = dict(self.__dict__)
-            # The compiled shared-object will be pickled; upon unpickling, it
-            # will be restored into a potentially different temporary directory,
-            # so the entire process during which the shared-object is loaded and
-            # given to ctypes must be performed again
-            state['_lib'] = None
-            state['_cfunction'] = None
-            # Do not pickle the `args` used to construct the Operator. Not only
-            # would this be completely useless, but it might also lead to
-            # allocating additional memory upon unpickling, as the user-provided
-            # equations typically carry different instances of the same Function
-            # (e.g., f(t, x-1), f(t, x), f(t, x+1)), which are different objects
-            # with distinct `.data` fields
-            state['_args'] = None
-            with open(self._lib._name, 'rb') as f:
-                state['binary'] = f.read()
-                state['soname'] = self._soname
-            return state
-        else:
-            return self.__dict__
-
-    def __getnewargs_ex__(self):
-        return (None,), {}
-
-    def __setstate__(self, state):
-        soname = state.pop('soname', None)
-        binary = state.pop('binary', None)
-        for k, v in state.items():
-            setattr(self, k, v)
-        if soname is not None:
-            self._compiler.save(soname, binary)
-            self._lib = self._compiler.load(soname)
-            self._lib.name = soname
-
-
-# Default action (perform or bypass) for selected compilation passes upon
-# recursive compilation
-# NOTE: it may not only be pointless to apply the following passes recursively
-# (because once, during the main compilation phase, is simply enough), but also
-# dangerous as some of them (the minority) might break in some circumstances
-# if applied in cascade (e.g., `linearization` on top of `linearization`)
-rcompile_registry = {
-    'mpi': False,
-    'linearize': False,
-    'place-transfers': False
-}
-
-
-def rcompile(expressions, kwargs=None):
-    """
-    Perform recursive compilation on an ordered sequence of symbolic expressions.
-    """
-    if not kwargs or 'options' not in kwargs:
-        kwargs = parse_kwargs(**kwargs)
-        cls = operator_selector(**kwargs)
-        kwargs = cls._normalize_kwargs(**kwargs)
-    else:
-        cls = operator_selector(**kwargs)
-
-    # Tweak the compilation kwargs
-    options = dict(kwargs['options'])
-    options.update(rcompile_registry)
-    kwargs['options'] = options
-
-    # Recursive profiling not supported -- would be a complete mess
-    kwargs.pop('profiler', None)
-
-    return cls._lower(expressions, **kwargs)
-
-
-# Misc helpers
-
-class ArgumentsMap(dict):
-
-    def __init__(self, args, grid, op):
-        super().__init__(args)
-
-        self.grid = grid
-
-        self.allocator = op._allocator
-        self.platform = op._platform
-        # self.language = op._language
-        self.compiler = op._compiler
-        self.options = op._options
-
-    @property
-    def comm(self):
-        """The MPI communicator the arguments are collective over."""
-        return self.grid.comm if self.grid is not None else MPI.COMM_NULL
-
-    @property
-    def opkwargs(self):
-        temp_registry = {v: k for k, v in platform_registry.items()}
-        platform = temp_registry[self.platform]
-
-        temp_registry = {v: k for k, v in compiler_registry.items()}
-        compiler = temp_registry[self.compiler.__class__]
-
-        return {'platform': platform, 'compiler': compiler, 'language': self.language}
-
-
-def parse_kwargs(**kwargs):
-    """
-    Parse keyword arguments provided to an Operator.
-    """
-    # `dse` -- deprecated, dropped
-    dse = kwargs.pop("dse", None)
-    if dse is not None:
-        warning("The `dse` argument is deprecated. "
-                "The optimization level is now controlled via the `opt` argument")
-
-    # `dle` -- deprecated, replaced by `opt`
-    if 'dle' in kwargs:
-        warning("The `dle` argument is deprecated. "
-                "The optimization level is now controlled via the `opt` argument")
-        dle = kwargs.pop('dle')
-        if 'opt' in kwargs:
-            warning("Both `dle` and `opt` were passed; ignoring `dle` argument")
-            opt = kwargs.pop('opt')
-        else:
-            warning("Setting `opt=%s`" % str(dle))
-            opt = dle
-    elif 'opt' in kwargs:
-        opt = kwargs.pop('opt')
-    else:
-        opt = configuration['opt']
-
-    if not opt or isinstance(opt, str):
-        mode, options = opt, {}
-    elif isinstance(opt, tuple):
-        if len(opt) == 0:
-            mode, options = 'noop', {}
-        elif isinstance(opt[-1], dict):
-            if len(opt) == 2:
-                mode, options = opt
-            else:
-                mode, options = tuple(flatten(i.split(',') for i in opt[:-1])), opt[-1]
-        else:
-            mode, options = tuple(flatten(i.split(',') for i in opt)), {}
-    else:
-        raise InvalidOperator("Illegal `opt=%s`" % str(opt))
-
-    # `opt`, deprecated kwargs
-    kwopenmp = kwargs.get('openmp', options.get('openmp'))
-    if kwopenmp is None:
-        openmp = kwargs.get('language', configuration['language']) == 'openmp'
-    else:
-        openmp = kwopenmp
-
-    # `opt`, options
-    options = dict(options)
-    options.setdefault('openmp', openmp)
-    options.setdefault('mpi', configuration['mpi'])
-    for k, v in configuration['opt-options'].items():
-        options.setdefault(k, v)
-    # Handle deprecations
-    deprecated_options = ('cire-mincost-inv', 'cire-mincost-sops', 'cire-maxalias')
-    for i in deprecated_options:
-        try:
-            options.pop(i)
-            warning("Ignoring deprecated optimization option `%s`" % i)
-        except KeyError:
-            pass
-    kwargs['options'] = options
-
-    # `opt`, mode
-    if mode is None:
-        mode = 'noop'
-    kwargs['mode'] = mode
-
-    # `platform`
-    platform = kwargs.get('platform')
-    if platform is not None:
-        if not isinstance(platform, str):
-            raise ValueError("Argument `platform` should be a `str`")
-        if platform not in configuration._accepted['platform']:
-            raise InvalidOperator("Illegal `platform=%s`" % str(platform))
-        kwargs['platform'] = platform_registry[platform]()
-    else:
-        kwargs['platform'] = configuration['platform']
-
-    # `language`
-    language = kwargs.get('language')
-    if language is not None:
-        if not isinstance(language, str):
-            raise ValueError("Argument `language` should be a `str`")
-        if language not in configuration._accepted['language']:
-            raise InvalidOperator("Illegal `language=%s`" % str(language))
-        kwargs['language'] = language
-    elif kwopenmp is not None:
-        # Handle deprecated `openmp` kwarg for backward compatibility
-        kwargs['language'] = 'openmp' if openmp else 'C'
-    else:
-        kwargs['language'] = configuration['language']
-
-    # `compiler`
-    compiler = kwargs.get('compiler')
-    if compiler is not None:
-        if not isinstance(compiler, str):
-            raise ValueError("Argument `compiler` should be a `str`")
-        if compiler not in configuration._accepted['compiler']:
-            raise InvalidOperator("Illegal `compiler=%s`" % str(compiler))
-        kwargs['compiler'] = compiler_registry[compiler](platform=kwargs['platform'],
-                                                         language=kwargs['language'],
-                                                         mpi=configuration['mpi'])
-    elif any([platform, language]):
-        kwargs['compiler'] =\
-            configuration['compiler'].__new_with__(platform=kwargs['platform'],
-                                                   language=kwargs['language'],
-                                                   mpi=configuration['mpi'])
-    else:
-        kwargs['compiler'] = configuration['compiler'].__new_with__()
-
-    # `allocator`
-    kwargs['allocator'] = default_allocator(
-        '%s.%s.%s' % (kwargs['compiler'].name,
-                      kwargs['language'],
-                      kwargs['platform'])
-    )
-
-    return kwargs
-
 
 def get_arg_names_from_module(op):
     return [

From 97faa0762bc6d2d387b0d13ecc8dcbf437ed1d0b Mon Sep 17 00:00:00 2001
From: George Bisbas <g.bisbas18@imperial.ac.uk>
Date: Tue, 28 Nov 2023 18:12:19 +0000
Subject: [PATCH 6/6] cleanup: drop more obsolete code

---
 devito/operator/xdsl_operator.py | 69 ++++++++++++--------------------
 1 file changed, 26 insertions(+), 43 deletions(-)

diff --git a/devito/operator/xdsl_operator.py b/devito/operator/xdsl_operator.py
index 7eddd7d30f..7166eac3c0 100644
--- a/devito/operator/xdsl_operator.py
+++ b/devito/operator/xdsl_operator.py
@@ -1,41 +1,25 @@
 import os
 import subprocess
-import ctypes
 import tempfile
 
-from math import ceil
 from collections import OrderedDict
 from io import StringIO
-from operator import attrgetter
 
-from cached_property import cached_property
 
 from devito import Operator
-from devito.arch import compiler_registry, platform_registry
-from devito.data import default_allocator
 from devito.exceptions import InvalidOperator
-from devito.ir.clusters import ClusterGroup, clusterize
-from devito.ir.equations import LoweredEq, lower_exprs
-from devito.ir.iet import (Callable, CInterface, EntryFunction, FindSymbols, MetaCall,
-                           derive_parameters, iet_build)
+
+from devito.ir.iet import Callable, MetaCall
 from devito.ir.ietxdsl import (finalize_module_with_globals)
-from devito.ir.stree import stree_build
-from devito.ir.support import AccessMode, SymbolRegistry
+from devito.ir.support import SymbolRegistry
 from devito.ir.ietxdsl.cluster_to_ssa import (ExtractDevitoStencilConversion,
                                               convert_devito_stencil_to_xdsl_stencil)
-from devito.logger import debug, info, perf, warning, is_log_enabled_for
+from devito.logger import debug, info, perf
 from devito.operator.operator import IRs
-from devito.operator.profiling import AdvancedProfilerVerbose, create_profile
-from devito.operator.registry import operator_selector
-from devito.parameters import configuration
-from devito.passes import (Graph, lower_index_derivatives, generate_implicit,
-                           generate_macros, minimize_symbols, unevaluate)
+from devito.operator.profiling import create_profile
 from devito.passes.iet import CTarget
-from devito.symbolics import estimate_cost
-from devito.tools import (DAG, ReducerMap, as_tuple, flatten,
-                          filter_sorted, frozendict, is_integer, split, timed_pass,
-                          contains_val)
-from devito.types import Evaluable, TimeFunction, Grid
+from devito.tools import as_tuple, flatten, filter_sorted
+from devito.types import Evaluable, TimeFunction
 from devito.types.mlir_types import ptr_of, f32
 from devito.mpi import MPI
 
@@ -48,25 +32,6 @@
 __all__ = ['XDSLOperator']
 
 
-# small interop shim script for stuff that we don't want to implement in mlir-ir
-_INTEROP_C = """
-#include <time.h>
-
-double timer_start() {
-  // return a number representing the current point in time
-  // it might be offset by a fixed ammount
-  struct timespec t;
-  clock_gettime(CLOCK_MONOTONIC, &t);
-  return (t.tv_sec) + (t.tv_nsec * 1e-9);
-}
-
-double timer_end(double start) {
-  // return time elaspes since start in seconds
-  return (timer_start() - start);
-}
-"""
-
-
 class XDSLOperator(Operator):
 
     _Target = CTarget
@@ -357,7 +322,6 @@ def _lower(cls, expressions, **kwargs):
 
         return IRs(expressions, clusters, stree, uiet, iet), byproduct, module
 
-
     @property
     def cfunction(self):
         """The JIT-compiled C function as a ctypes.FuncPtr object."""
@@ -406,3 +370,22 @@ def get_arg_names_from_module(op):
     return [
         str_attr.data for str_attr in op.body.block.ops.first.attributes['param_names'].data  # noqa
     ]
+
+
+# small interop shim script for stuff that we don't want to implement in mlir-ir
+_INTEROP_C = """
+#include <time.h>
+
+double timer_start() {
+  // return a number representing the current point in time
+  // it might be offset by a fixed ammount
+  struct timespec t;
+  clock_gettime(CLOCK_MONOTONIC, &t);
+  return (t.tv_sec) + (t.tv_nsec * 1e-9);
+}
+
+double timer_end(double start) {
+  // return time elaspes since start in seconds
+  return (timer_start() - start);
+}
+"""