From 6146b87880efca1f0ec85521052be37e1120cc5a Mon Sep 17 00:00:00 2001 From: George Bisbas Date: Fri, 24 Nov 2023 19:55:05 +0000 Subject: [PATCH 1/6] compiler: Simplify --- devito/ir/ietxdsl/cluster_to_ssa.py | 40 +++++++++++++++++------------ 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/devito/ir/ietxdsl/cluster_to_ssa.py b/devito/ir/ietxdsl/cluster_to_ssa.py index 9bf155d206..4624aec01c 100644 --- a/devito/ir/ietxdsl/cluster_to_ssa.py +++ b/devito/ir/ietxdsl/cluster_to_ssa.py @@ -11,7 +11,7 @@ # ------------- devito imports -------------# from devito import Grid, SteppingDimension from devito.ir.equations import LoweredEq -from devito.symbolics import retrieve_indexed +from devito.symbolics import retrieve_indexed, retrieve_function_carriers from devito.logger import perf # ------------- devito-xdsl SSA imports -------------# @@ -50,26 +50,26 @@ def _convert_eq(self, eq: LoweredEq): function = eq.lhs.function mlir_type = dtypes_to_xdsltypes[function.dtype] grid: Grid = function.grid - # get the halo of the space dimensions only e.g [(2, 2), (2, 2)] for the 2d case + + # Get the halo of the grid.dimensions + # e.g [(2, 2), (2, 2)] for the 2D case # Do not forget the issue with Devito adding an extra point! + # Check 'def halo_setup' for more # (for derivative regions) - halo = [function.halo[function.dimensions.index(d)] for d in grid.dimensions] - + halo = [function.halo[d] for d in grid.dimensions] + # Shift all time values so that for all accesses at t + n, n>=0. self.time_offs = min( int(idx.indices[0] - grid.stepping_dim) for idx in retrieve_indexed(eq) ) - # Calculate the actual size of our time dimension - actual_time_size = ( - max(int(idx.indices[0] - grid.stepping_dim) for idx in retrieve_indexed(eq)) - - self.time_offs - + 1 - ) - + + # Get the time_size + time_size = max(d.function.time_size for d in retrieve_function_carriers(eq)) + # Build the for loop perf("Build Time Loop") - loop = self._build_iet_for(grid.stepping_dim, actual_time_size) + loop = self._build_iet_for(grid.stepping_dim, time_size) # build stencil perf("Initialize a stencil Op") @@ -77,7 +77,7 @@ def _convert_eq(self, eq: LoweredEq): loop.subindice_ssa_vals(), grid.shape_local, halo, - actual_time_size, + time_size, mlir_type, eq.lhs.function._C_name, ) @@ -87,7 +87,7 @@ def _convert_eq(self, eq: LoweredEq): # dims -> ssa vals perf("Apply time offsets") time_offset_to_field: dict[str, SSAValue] = { - i: stencil_op.block.args[i] for i in range(actual_time_size - 1) + i: stencil_op.block.args[i] for i in range(time_size - 1) } # reset loaded values @@ -103,8 +103,10 @@ def _convert_eq(self, eq: LoweredEq): # emit return offsets = _get_dim_offsets(eq.lhs, self.time_offs) + # import pdb;pdb.set_trace() + assert ( - offsets[0] == actual_time_size - 1 + offsets[0] == time_size - 1 ), "result should be written to last time buffer" assert all( o == 0 for o in offsets[1:] @@ -153,8 +155,10 @@ def _visit_math_nodes(self, node: Expr) -> SSAValue: # select the correct op from arith.addi, arith.addf, arith.muli, arith.mulf if isinstance(carry.type, builtin.IntegerType): op_cls = arith.Addi if isinstance(node, Add) else arith.Muli - else: + elif isinstance(carry.type, builtin.Float32Type): op_cls = arith.Addf if isinstance(node, Add) else arith.Mulf + else: + raise("Add support for another type") for arg in args: op = op_cls(carry, arg) @@ -202,10 +206,11 @@ def _add_access_ops( """ # get the compile time constant offsets for this read offsets = _get_dim_offsets(read, self.time_offs) + if offsets in self.loaded_values: continue - # assume time dimension is first dimension + # Assume time dimension is first dimension t_offset = offsets[0] space_offsets = offsets[1:] @@ -280,6 +285,7 @@ def _get_dim_offsets(idx: Indexed, t_offset: int) -> tuple: # shift all time values so that for all accesses at t + n, n>=0. # time_offs = min(int(i - d) for i, d in zip(idx.indices, idx.function.dimensions)) halo = ((t_offset, 0), *idx.function.halo[1:]) + try: return tuple( int(i - d - halo_offset) From c4bef2d4e4ad15640836b6ae713d269cf333c91c Mon Sep 17 00:00:00 2001 From: George Bisbas Date: Tue, 28 Nov 2023 12:26:40 +0000 Subject: [PATCH 2/6] compiler: Drop more code --- devito/ir/ietxdsl/__init__.py | 6 ++-- devito/ir/ietxdsl/cluster_to_ssa.py | 9 ++---- devito/ir/ietxdsl/lowering.py | 43 ----------------------------- devito/ir/ietxdsl/utils.py | 10 +++++++ 4 files changed, 16 insertions(+), 52 deletions(-) create mode 100644 devito/ir/ietxdsl/utils.py diff --git a/devito/ir/ietxdsl/__init__.py b/devito/ir/ietxdsl/__init__.py index 2ba899e861..96bddfce7b 100644 --- a/devito/ir/ietxdsl/__init__.py +++ b/devito/ir/ietxdsl/__init__.py @@ -1,2 +1,4 @@ -from devito.ir.ietxdsl.lowering import LowerIetForToScfFor, LowerIetForToScfParallel, DropIetComments, iet_to_standard_mlir # noqa -from devito.ir.ietxdsl.cluster_to_ssa import finalize_module_with_globals, convert_devito_stencil_to_xdsl_stencil # noqa +from devito.ir.ietxdsl.lowering import (LowerIetForToScfFor, LowerIetForToScfParallel, + iet_to_standard_mlir) # noqa +from devito.ir.ietxdsl.cluster_to_ssa import (finalize_module_with_globals, + convert_devito_stencil_to_xdsl_stencil) # noqa diff --git a/devito/ir/ietxdsl/cluster_to_ssa.py b/devito/ir/ietxdsl/cluster_to_ssa.py index 4624aec01c..4947b416ed 100644 --- a/devito/ir/ietxdsl/cluster_to_ssa.py +++ b/devito/ir/ietxdsl/cluster_to_ssa.py @@ -16,8 +16,10 @@ # ------------- devito-xdsl SSA imports -------------# from devito.ir.ietxdsl import iet_ssa +from devito.ir.ietxdsl.utils import is_int, is_float from devito.ir.ietxdsl.ietxdsl_functions import dtypes_to_xdsltypes + # flake8: noqa class ExtractDevitoStencilConversion: @@ -297,13 +299,6 @@ def _get_dim_offsets(idx: Indexed, t_offset: int) -> tuple: raise ValueError("Indices must be constant offset from dimension!") from ex -def is_int(val: SSAValue): - return isinstance(val.type, builtin.IntegerType) - - -def is_float(val: SSAValue): - return val.type in (builtin.f32, builtin.f64) - # -------------------------------------------------------- #### # #### diff --git a/devito/ir/ietxdsl/lowering.py b/devito/ir/ietxdsl/lowering.py index 6505f1d234..e7e0e5bb81 100644 --- a/devito/ir/ietxdsl/lowering.py +++ b/devito/ir/ietxdsl/lowering.py @@ -8,36 +8,6 @@ GreedyRewritePatternApplier, op_type_rewrite_pattern) -def _generate_subindices(subindices: int, block: Block, - rewriter: PatternRewriter): - # keep track of the what argument we should replace with what - arg_changes: list[tuple[SSAValue, SSAValue]] = [] - - # keep track of the ops we want to insert - modulo = arith.Constant.from_int_and_width(subindices, builtin.i64) - new_ops = [modulo] - - # generate the new indices - for i in range(subindices): - offset = arith.Constant.from_int_and_width(i, builtin.i64) - index_off = arith.Addi(block.args[0], offset) - index = arith.RemSI(index_off, modulo) - - new_ops += [ - offset, - index_off, - index, - ] - # replace block.args[i+1] with (arg0 + i) % n - arg_changes.append((block.args[i + 1], index.result)) - - rewriter.insert_op_at_start(new_ops, block) - - for old, new in arg_changes: - old.replace_by(new) - block.erase_arg(old) - - class ConvertScfForArgsToIndex(RewritePattern): @op_type_rewrite_pattern def match_and_rewrite(self, op: scf.For, rewriter: PatternRewriter, /): @@ -210,19 +180,6 @@ def recurse_scf_parallel( return lbs, ubs, steps, body -class DropIetComments(RewritePattern): - """ - This drops all iet.comment operations - - TODO: convert iet.comment ops that have timer info into their own nodes - """ - - @op_type_rewrite_pattern - def match_and_rewrite(self, op: iet_ssa.Statement, - rewriter: PatternRewriter, /): - rewriter.erase_matched_op() - - @dataclass class LowerIetPointerCastAndDataObject(RewritePattern): dimensions: list[SSAValue] = field(default_factory=list) diff --git a/devito/ir/ietxdsl/utils.py b/devito/ir/ietxdsl/utils.py new file mode 100644 index 0000000000..6d413c8898 --- /dev/null +++ b/devito/ir/ietxdsl/utils.py @@ -0,0 +1,10 @@ +from xdsl.dialects import builtin +from xdsl.ir import SSAValue + + +def is_int(val: SSAValue): + return isinstance(val.type, builtin.IntegerType) + + +def is_float(val: SSAValue): + return val.type in (builtin.f32, builtin.f64) From b2b1aba112fddf0bef3e076e80b45aa8f7821c09 Mon Sep 17 00:00:00 2001 From: George Bisbas Date: Tue, 28 Nov 2023 16:40:05 +0000 Subject: [PATCH 3/6] cleanup: Drop useless rewrite and restructure --- devito/core/cpu.py | 29 +++++++ devito/ir/ietxdsl/__init__.py | 7 +- devito/ir/ietxdsl/cluster_to_ssa.py | 11 +-- devito/ir/ietxdsl/lowering.py | 12 +-- devito/operator/xdsl_operator.py | 123 ++++++++-------------------- tests/test_xdsl_base.py | 36 ++++++++ 6 files changed, 117 insertions(+), 101 deletions(-) diff --git a/devito/core/cpu.py b/devito/core/cpu.py index 7137c08b06..15a60768a7 100644 --- a/devito/core/cpu.py +++ b/devito/core/cpu.py @@ -311,3 +311,32 @@ class Cpu64FsgCOperator(Cpu64FsgOperator): class Cpu64FsgOmpOperator(Cpu64FsgOperator): _Target = OmpTarget + + +# -----------XDSL +# This is a collection of xDSL optimization pipelines +# Ideally they should follow the same type of subclassing as the rest of +# the Devito Operatos + + +MLIR_CPU_PIPELINE = '"builtin.module(canonicalize, cse, loop-invariant-code-motion, canonicalize, cse, loop-invariant-code-motion, cse, canonicalize, fold-memref-alias-ops, expand-strided-metadata, loop-invariant-code-motion, lower-affine, convert-scf-to-cf, convert-math-to-llvm, convert-func-to-llvm{use-bare-ptr-memref-call-conv}, finalize-memref-to-llvm, canonicalize, cse)"' # noqa + +MLIR_OPENMP_PIPELINE = '"builtin.module(canonicalize, cse, loop-invariant-code-motion, canonicalize, cse, loop-invariant-code-motion,cse,canonicalize,fold-memref-alias-ops,expand-strided-metadata, loop-invariant-code-motion,lower-affine,finalize-memref-to-llvm,loop-invariant-code-motion,canonicalize,cse,convert-scf-to-openmp,finalize-memref-to-llvm,convert-scf-to-cf,convert-func-to-llvm{use-bare-ptr-memref-call-conv},convert-openmp-to-llvm,convert-math-to-llvm,reconcile-unrealized-casts,canonicalize,cse)"' # noqa +# gpu-launch-sink-index-computations seemed to have no impact +MLIR_GPU_PIPELINE = lambda block_sizes: f'"builtin.module(test-math-algebraic-simplification,scf-parallel-loop-tiling{{parallel-loop-tile-sizes={block_sizes}}},func.func(gpu-map-parallel-loops),convert-parallel-loops-to-gpu,lower-affine, canonicalize,cse, fold-memref-alias-ops, gpu-launch-sink-index-computations, gpu-kernel-outlining, canonicalize{{region-simplify}},cse,fold-memref-alias-ops,expand-strided-metadata,lower-affine,canonicalize,cse,func.func(gpu-async-region),canonicalize,cse,convert-arith-to-llvm{{index-bitwidth=64}},convert-scf-to-cf,convert-cf-to-llvm{{index-bitwidth=64}},canonicalize,cse,convert-func-to-llvm{{use-bare-ptr-memref-call-conv}},gpu.module(convert-gpu-to-nvvm,reconcile-unrealized-casts,canonicalize,gpu-to-cubin),gpu-to-llvm,canonicalize,cse)"' # noqa + +XDSL_CPU_PIPELINE = lambda nb_tiled_dims: f'"stencil-shape-inference,convert-stencil-to-ll-mlir{{{generate_tiling_arg(nb_tiled_dims)}}},printf-to-llvm"' # noqa + +XDSL_GPU_PIPELINE = "stencil-shape-inference,convert-stencil-to-ll-mlir{target=gpu},reconcile-unrealized-casts,printf-to-llvm" # noqa + +XDSL_MPI_PIPELINE = lambda decomp, nb_tiled_dims: f'"dmp-decompose{decomp},canonicalize-dmp,convert-stencil-to-ll-mlir{{{generate_tiling_arg(nb_tiled_dims)}}},dmp-to-mpi{{mpi_init=false}},lower-mpi,printf-to-llvm"' # noqa + + +def generate_tiling_arg(nb_tiled_dims: int): + """ + Generate the tile-sizes arg for the convert-stencil-to-ll-mlir pass. + Generating no argument if the diled_dims arg is 0 + """ + if nb_tiled_dims == 0: + return '' + return "tile-sizes=" + ",".join(["64"]*nb_tiled_dims) diff --git a/devito/ir/ietxdsl/__init__.py b/devito/ir/ietxdsl/__init__.py index 96bddfce7b..24065c3581 100644 --- a/devito/ir/ietxdsl/__init__.py +++ b/devito/ir/ietxdsl/__init__.py @@ -1,4 +1,5 @@ -from devito.ir.ietxdsl.lowering import (LowerIetForToScfFor, LowerIetForToScfParallel, - iet_to_standard_mlir) # noqa +from devito.ir.ietxdsl.lowering import (LowerIetForToScfFor, LowerIetForToScfParallel) from devito.ir.ietxdsl.cluster_to_ssa import (finalize_module_with_globals, - convert_devito_stencil_to_xdsl_stencil) # noqa + convert_devito_stencil_to_xdsl_stencil) + +# flake8: noqa diff --git a/devito/ir/ietxdsl/cluster_to_ssa.py b/devito/ir/ietxdsl/cluster_to_ssa.py index 4947b416ed..a452919a82 100644 --- a/devito/ir/ietxdsl/cluster_to_ssa.py +++ b/devito/ir/ietxdsl/cluster_to_ssa.py @@ -59,13 +59,13 @@ def _convert_eq(self, eq: LoweredEq): # Check 'def halo_setup' for more # (for derivative regions) halo = [function.halo[d] for d in grid.dimensions] - + # Shift all time values so that for all accesses at t + n, n>=0. self.time_offs = min( int(idx.indices[0] - grid.stepping_dim) for idx in retrieve_indexed(eq) ) - + # Get the time_size time_size = max(d.function.time_size for d in retrieve_function_carriers(eq)) @@ -105,7 +105,6 @@ def _convert_eq(self, eq: LoweredEq): # emit return offsets = _get_dim_offsets(eq.lhs, self.time_offs) - # import pdb;pdb.set_trace() assert ( offsets[0] == time_size - 1 @@ -546,8 +545,10 @@ def finalize_module_with_globals(module: builtin.ModuleOp, known_symbols: dict[s _InsertSymbolicConstants(known_symbols), _LowerLoadSymbolidToFuncArgs(), ] - grpa = GreedyRewritePatternApplier(patterns) - PatternRewriteWalker(grpa).rewrite_module(module) + rewriter = GreedyRewritePatternApplier(patterns) + PatternRewriteWalker(rewriter).rewrite_module(module) + + # GPU boilerplate if gpu_boilerplate: walker = PatternRewriteWalker(GreedyRewritePatternApplier([WrapFunctionWithTransfers('apply_kernel')])) walker.rewrite_module(module) diff --git a/devito/ir/ietxdsl/lowering.py b/devito/ir/ietxdsl/lowering.py index e7e0e5bb81..a74e7ae908 100644 --- a/devito/ir/ietxdsl/lowering.py +++ b/devito/ir/ietxdsl/lowering.py @@ -260,15 +260,16 @@ def match_and_rewrite(self, op: func.FuncOp, rewriter: PatternRewriter, /): iet_ssa.Dataobj.get_llvm_struct_type(), ) elif isinstance(arg_typ, iet_ssa.Profiler): op.body.blocks[0].args[i].type = llvm.LLVMPointerType.opaque() - recalc_func_type(op) def recalc_func_type(op: func.FuncOp): - op.attributes['function_type'] = builtin.FunctionType.from_lists( - [arg.type for arg in op.body.blocks[0].args], - op.function_type.outputs.data, - ) + # Only if blocks exist + if op.body.blocks: + op.attributes['function_type'] = builtin.FunctionType.from_lists( + [arg.type for arg in op.body.blocks[0].args], + op.function_type.outputs.data, + ) @dataclass @@ -369,7 +370,6 @@ def iet_to_standard_mlir(module: builtin.ModuleOp): LowerIetForToScfFor(), ConvertScfForArgsToIndex(), ConvertScfParallelArgsToIndex(), - DropIetComments(), CleanupDanglingIetDatatypes(), ptr_lower := LowerIetPointerCastAndDataObject(), LowerMemrefLoadToLLvmPointer(ptr_lower), diff --git a/devito/operator/xdsl_operator.py b/devito/operator/xdsl_operator.py index 0e4e059b8d..4c9e3e036b 100644 --- a/devito/operator/xdsl_operator.py +++ b/devito/operator/xdsl_operator.py @@ -4,7 +4,7 @@ import tempfile from math import ceil -from collections import OrderedDict, namedtuple +from collections import OrderedDict from io import StringIO from operator import attrgetter @@ -26,12 +26,13 @@ from devito.logger import debug, info, perf, warning, is_log_enabled_for from devito.operator.operator import IRs from devito.operator.profiling import AdvancedProfilerVerbose, create_profile +from devito.operator.registry import operator_selector from devito.parameters import configuration from devito.passes import (Graph, lower_index_derivatives, generate_implicit, generate_macros, minimize_symbols, unevaluate) from devito.passes.iet import CTarget from devito.symbolics import estimate_cost -from devito.tools import (DAG, OrderedSet, ReducerMap, as_tuple, flatten, +from devito.tools import (DAG, ReducerMap, as_tuple, flatten, filter_sorted, frozendict, is_integer, split, timed_pass, contains_val) from devito.types import Evaluable, TimeFunction, Grid @@ -40,7 +41,9 @@ from xdsl.printer import Printer -# flake8: noqa + +from devito.core.cpu import (MLIR_CPU_PIPELINE, XDSL_CPU_PIPELINE, XDSL_MPI_PIPELINE, + MLIR_OPENMP_PIPELINE, XDSL_GPU_PIPELINE, MLIR_GPU_PIPELINE) __all__ = ['XDSLOperator'] @@ -64,27 +67,6 @@ """ -def generate_tiling_arg(nb_tiled_dims: int): - """ - Generate the tile-sizes arg for the convert-stencil-to-ll-mlir pass. Generating no argument if the diled_dims arg is 0 - """ - if nb_tiled_dims == 0: - return '' - return "tile-sizes=" + ",".join(["64"]*nb_tiled_dims) - - -CFLAGS = "-O3 -march=native -mtune=native -lmlir_c_runner_utils" - -MLIR_CPU_PIPELINE = '"builtin.module(canonicalize, cse, loop-invariant-code-motion, canonicalize, cse, loop-invariant-code-motion,cse,canonicalize,fold-memref-alias-ops,expand-strided-metadata, loop-invariant-code-motion,lower-affine,convert-scf-to-cf,convert-math-to-llvm,convert-func-to-llvm{use-bare-ptr-memref-call-conv},finalize-memref-to-llvm,canonicalize,cse)"' -MLIR_OPENMP_PIPELINE = '"builtin.module(canonicalize, cse, loop-invariant-code-motion, canonicalize, cse, loop-invariant-code-motion,cse,canonicalize,fold-memref-alias-ops,expand-strided-metadata, loop-invariant-code-motion,lower-affine,finalize-memref-to-llvm,loop-invariant-code-motion,canonicalize,cse,convert-scf-to-openmp,finalize-memref-to-llvm,convert-scf-to-cf,convert-func-to-llvm{use-bare-ptr-memref-call-conv},convert-openmp-to-llvm,convert-math-to-llvm,reconcile-unrealized-casts,canonicalize,cse)"' -# gpu-launch-sink-index-computations seemed to have no impact -MLIR_GPU_PIPELINE = lambda block_sizes: f'"builtin.module(test-math-algebraic-simplification,scf-parallel-loop-tiling{{parallel-loop-tile-sizes={block_sizes}}},func.func(gpu-map-parallel-loops),convert-parallel-loops-to-gpu,lower-affine, canonicalize,cse, fold-memref-alias-ops, gpu-launch-sink-index-computations, gpu-kernel-outlining, canonicalize{{region-simplify}},cse,fold-memref-alias-ops,expand-strided-metadata,lower-affine,canonicalize,cse,func.func(gpu-async-region),canonicalize,cse,convert-arith-to-llvm{{index-bitwidth=64}},convert-scf-to-cf,convert-cf-to-llvm{{index-bitwidth=64}},canonicalize,cse,convert-func-to-llvm{{use-bare-ptr-memref-call-conv}},gpu.module(convert-gpu-to-nvvm,reconcile-unrealized-casts,canonicalize,gpu-to-cubin),gpu-to-llvm,canonicalize,cse)"' - -XDSL_CPU_PIPELINE = lambda nb_tiled_dims: f'"stencil-shape-inference,convert-stencil-to-ll-mlir{{{generate_tiling_arg(nb_tiled_dims)}}},printf-to-llvm"' -XDSL_GPU_PIPELINE = "stencil-shape-inference,convert-stencil-to-ll-mlir{target=gpu},reconcile-unrealized-casts,printf-to-llvm" -XDSL_MPI_PIPELINE = lambda decomp, nb_tiled_dims: f'"dmp-decompose{decomp},canonicalize-dmp,convert-stencil-to-ll-mlir{{{generate_tiling_arg(nb_tiled_dims)}}},dmp-to-mpi{{mpi_init=false}},lower-mpi,printf-to-llvm"' - - class XDSLOperator(Operator): _Target = CTarget @@ -116,6 +98,7 @@ def _make_interop_o(self): @property def mpi_shape(self) -> tuple: + # TODO: move it elsewhere dist = self.functions[0].grid.distributor # reverse topology for row->column major @@ -128,7 +111,7 @@ def _jit_compile(self): once per Operator, reagardless of how many times this method is invoked. """ - + with self._profiler.timer_on('jit-compile'): is_mpi = MPI.Is_initialized() is_gpu = os.environ.get("DEVITO_PLATFORM", None) == 'nvidiaX' @@ -149,21 +132,19 @@ def _jit_compile(self): Printer(stream=module_str).print(self._module) module_str = module_str.getvalue() - to_tile = len(list(filter(lambda s: str(s) in ["x", "y", "z"], self.dimensions)))-1 + to_tile = len(list(filter(lambda d: d.is_Space, self.dimensions)))-1 xdsl_pipeline = XDSL_CPU_PIPELINE(to_tile) mlir_pipeline = MLIR_CPU_PIPELINE - block_sizes: list[int] = [min(target, self._jit_kernel_constants.get(f"{dim}_size", 1)) for target, dim in zip([32, 4, 8], ["x", "y", "z"])] - block_sizes = ','.join(str(bs) for bs in block_sizes) - if is_omp: mlir_pipeline = MLIR_OPENMP_PIPELINE if is_mpi: shape, mpi_rank = self.mpi_shape # Run with restrict domain=false so we only introduce the swaps but don't - # reduce the domain of the computation (as devito has already done that for us) + # reduce the domain of the computation + # (as devito has already done that for us) slices = ','.join(str(x) for x in shape) decomp = "2d-grid" if len(shape) == 2 else "3d-grid" @@ -172,6 +153,9 @@ def _jit_compile(self): xdsl_pipeline = XDSL_MPI_PIPELINE(decomp, to_tile) elif is_gpu: xdsl_pipeline = XDSL_GPU_PIPELINE + # Get GPU blocking shapes + block_sizes: list[int] = [min(target, self._jit_kernel_constants.get(f"{dim}_size", 1)) for target, dim in zip([32, 4, 8], ["x", "y", "z"])] # noqa + block_sizes = ','.join(str(bs) for bs in block_sizes) mlir_pipeline = MLIR_GPU_PIPELINE(block_sizes) # allow jit backdooring to provide your own xdsl code @@ -190,8 +174,9 @@ def _jit_compile(self): source_file.close() # Compile IR using xdsl-opt | mlir-opt | mlir-translate | clang + cflags = "-O3 -march=native -mtune=native -lmlir_c_runner_utils" + try: - cflags = CFLAGS cc = "clang" if is_mpi: @@ -208,15 +193,12 @@ def _jit_compile(self): xdsl_cmd = f'xdsl-opt {source_name} -p {xdsl_pipeline}' mlir_cmd = f'mlir-opt -p {mlir_pipeline}' mlir_translate_cmd = 'mlir-translate --mlir-to-llvmir' - clang_cmd = f'{cc} {cflags} -shared -o {self._tf.name} {self._interop_tf.name} -xir -' + clang_cmd = f'{cc} {cflags} -shared -o {self._tf.name} {self._interop_tf.name} -xir -' # noqa - - comp_steps = [ - xdsl_cmd, + comp_steps = [xdsl_cmd, mlir_cmd, mlir_translate_cmd, - clang_cmd - ] + clang_cmd] # Execute each command and store the outputs outputs = [] @@ -231,7 +213,7 @@ def _jit_compile(self): 'stdout': stdout, 'stderr': stderr }) - + except Exception as ex: print("error") raise ex @@ -241,9 +223,10 @@ def _jit_compile(self): perf("XDSLOperator `%s` jit-compiled `%s` in %.2f s with `mlir-opt`" % (self.name, source_name, elapsed)) - def _cmd_compile(self, cmd, input=None): - stdin = subprocess.PIPE if input is not None else None + + # Could be dropped unless PIPE is never empty in the future + stdin = subprocess.PIPE if input is not None else None # noqa res = subprocess.run( cmd, @@ -271,11 +254,12 @@ def setup_memref_args(self): """ args = dict() for arg in self.functions: + # For every TimeFunction add memref if isinstance(arg, TimeFunction): - data = arg._data_allocated - # iterate over the first dimension (time) + data = arg._data for t in range(data.shape[0]): args[f'{arg._C_name}_{t}'] = data[t, ...].ctypes.data_as(ptr_of(f32)) + self._jit_kernel_constants.update(args) @classmethod @@ -352,7 +336,6 @@ def _lower(cls, expressions, **kwargs): """ # Create a symbol registry kwargs['sregistry'] = SymbolRegistry() - expressions = as_tuple(expressions) # Input check @@ -800,10 +783,10 @@ def arguments(self, **kwargs): # Code generation and JIT compilation - #@cached_property - #def _soname(self): - # """A unique name for the shared object resulting from JIT compilation.""" - # return Signer._digest(self, configuration) + # @cached_property + # def _soname(self): + # """A unique name for the shared object resulting from JIT compilation.""" + # return Signer._digest(self, configuration) @cached_property def ccode(self): @@ -825,7 +808,9 @@ def cfunction(self): if self._cfunction is None: self._cfunction = getattr(self._lib, "apply_kernel") # Associate a C type to each argument for runtime type check - self._cfunction.argtypes = self._construct_cfunction_args(self._jit_kernel_constants, get_types=True) + argtypes = self._construct_cfunction_args(self._jit_kernel_constants, + get_types=True) + self._cfunction.argtypes = argtypes return self._cfunction @@ -957,7 +942,7 @@ def apply(self, **kwargs): # Output summary of performance achieved return self._emit_apply_profiling(args) - def _construct_cfunction_args(self, args, get_types = False): + def _construct_cfunction_args(self, args, get_types=False): """ Either construct the args for the cfunction, or construct the arg types for it. @@ -965,7 +950,7 @@ def _construct_cfunction_args(self, args, get_types = False): ps = { p._C_name: p._C_ctype for p in self.parameters } - + things = [] things_types = [] @@ -982,37 +967,6 @@ def _construct_cfunction_args(self, args, get_types = False): else: return things - def _emit_build_profiling(self): - if not is_log_enabled_for('PERF'): - return - - # Rounder to K decimal places - fround = lambda i, n=100: ceil(i * n) / n - - timings = self._profiler.py_timers.copy() - - tot = timings.pop('op-compile') - perf("Operator `%s` generated in %.2f s" % (self.name, fround(tot))) - - max_hotspots = 3 - threshold = 20. - - def _emit_timings(timings, indent=''): - timings.pop('total', None) - entries = sorted(timings, key=lambda i: timings[i]['total'], reverse=True) - for i in entries[:max_hotspots]: - v = fround(timings[i]['total']) - perc = fround(v/tot*100, n=10) - if perc > threshold: - perf("%s%s: %.2f s (%.1f %%)" % (indent, i.lstrip('_'), v, perc)) - _emit_timings(timings[i], ' '*len(indent) + ' * ') - - _emit_timings(timings, ' * ') - - if self._profiler._ops: - ops = ['%d --> %d' % i for i in self._profiler._ops] - perf("Flops reduction after symbolic optimization: [%s]" % ' ; '.join(ops)) - def _emit_apply_profiling(self, args): """Produce a performance summary of the profiled sections.""" # Rounder to 2 decimal places @@ -1188,10 +1142,6 @@ def rcompile(expressions, kwargs=None): # Misc helpers - -IRs = namedtuple('IRs', 'expressions clusters stree uiet iet') - - class ArgumentsMap(dict): def __init__(self, args, grid, op): @@ -1345,6 +1295,5 @@ def parse_kwargs(**kwargs): def get_arg_names_from_module(op): return [ - str_attr.data - for str_attr in op.body.block.ops.first.attributes['param_names'].data + str_attr.data for str_attr in op.body.block.ops.first.attributes['param_names'].data # noqa ] diff --git a/tests/test_xdsl_base.py b/tests/test_xdsl_base.py index 6953720a72..090bcbf9d9 100644 --- a/tests/test_xdsl_base.py +++ b/tests/test_xdsl_base.py @@ -213,3 +213,39 @@ def test_acoustic_3D(shape, so, to, nt): xdsl_norm = norm(u) assert np.isclose(devito_norm, xdsl_norm, rtol=1e-04).all() + + +@pytest.mark.parametrize('shape', [(21, 21, 21)]) +@pytest.mark.parametrize('so', [2, 4]) +@pytest.mark.parametrize('to', [2]) +@pytest.mark.parametrize('nt', [20]) +def test_standard_mlir_rewrites(shape, so, to, nt): + + grid = Grid(shape=shape) + dt = 0.0001 + + # Define the wavefield with the size of the model and the time dimension + u = TimeFunction(name="u", grid=grid, time_order=to, space_order=so) + + pde = u.dt2 - u.laplace + eq0 = solve(pde, u.forward) + + stencil = Eq(u.forward, eq0) + u.data[:, :, :] = 0 + u.data[:, 40:50, 40:50] = 1 + + # Devito Operator + op = Operator([stencil]) + op.apply(time=nt, dt=dt) + + u.data[:, :, :] = 0 + u.data[:, 40:50, 40:50] = 1 + + # XDSL Operator + xdslop = XDSLOperator([stencil]) + xdslop.apply(time=nt, dt=dt) + + from devito.ir.ietxdsl.lowering import iet_to_standard_mlir + + # Check coverage of unused iet iet_to_standard_mlir + iet_to_standard_mlir(xdslop._module) From bcab46278cc4cad36f53f3e7db294655f0d18e30 Mon Sep 17 00:00:00 2001 From: George Bisbas Date: Tue, 28 Nov 2023 17:21:39 +0000 Subject: [PATCH 4/6] cleanup: drop more obsolete code --- devito/ir/ietxdsl/cluster_to_ssa.py | 82 ++++++++++++----------------- devito/xdslpasses/__init__.py | 1 - devito/xdslpasses/iet/__init__.py | 1 - devito/xdslpasses/iet/parpragma.py | 57 -------------------- 4 files changed, 35 insertions(+), 106 deletions(-) delete mode 100644 devito/xdslpasses/__init__.py delete mode 100644 devito/xdslpasses/iet/__init__.py delete mode 100644 devito/xdslpasses/iet/parpragma.py diff --git a/devito/ir/ietxdsl/cluster_to_ssa.py b/devito/ir/ietxdsl/cluster_to_ssa.py index a452919a82..d6474dfa60 100644 --- a/devito/ir/ietxdsl/cluster_to_ssa.py +++ b/devito/ir/ietxdsl/cluster_to_ssa.py @@ -1,12 +1,21 @@ # ------------- General imports -------------# from typing import Any +from dataclasses import dataclass, field from sympy import Add, Expr, Float, Indexed, Integer, Mod, Mul, Pow, Symbol # ------------- xdsl imports -------------# -from xdsl.dialects import arith, builtin, func, memref, scf, stencil, gpu +from xdsl.dialects import (arith, builtin, func, memref, scf, + stencil, gpu, llvm) from xdsl.dialects.experimental import math from xdsl.ir import Block, Operation, OpResult, Region, SSAValue +from xdsl.pattern_rewriter import ( + GreedyRewritePatternApplier, + PatternRewriter, + PatternRewriteWalker, + RewritePattern, + op_type_rewrite_pattern, +) # ------------- devito imports -------------# from devito import Grid, SteppingDimension @@ -18,7 +27,7 @@ from devito.ir.ietxdsl import iet_ssa from devito.ir.ietxdsl.utils import is_int, is_float from devito.ir.ietxdsl.ietxdsl_functions import dtypes_to_xdsltypes - +from devito.ir.ietxdsl.lowering import LowerIetForToScfFor # flake8: noqa @@ -121,34 +130,28 @@ def _convert_eq(self, eq: LoweredEq): ) def _visit_math_nodes(self, node: Expr) -> SSAValue: + # Handle Indexeds if isinstance(node, Indexed): offsets = _get_dim_offsets(node, self.time_offs) return self.loaded_values[offsets] - if isinstance(node, Integer): + # Handle Integers + elif isinstance(node, Integer): cst = arith.Constant.from_int_and_width(int(node), builtin.i64) self.block.add_op(cst) return cst.result - if isinstance(node, Float): + # Handle Floats + elif isinstance(node, Float): cst = arith.Constant.from_float_and_width(float(node), builtin.f32) self.block.add_op(cst) return cst.result - # if isinstance(math, Constant): - # symb = iet_ssa.LoadSymbolic.get(math.name, dtypes_to_xdsltypes[math.dtype]) - # self.block.add_op(symb) - # return symb.result - if isinstance(node, Symbol): + # Handle Symbols + elif isinstance(node, Symbol): symb = iet_ssa.LoadSymbolic.get(node.name, builtin.f32) self.block.add_op(symb) - return symb.result - - # handle all of the math - if not isinstance(node, (Add, Mul, Pow, Mod)): - raise ValueError(f"Unknown math: {node}", node) - - args = [self._visit_math_nodes(arg) for arg in node.args] - - # make sure all args are the same type: - if isinstance(node, (Add, Mul)): + return symb.result + # Handle Add Mul + elif isinstance(node, (Add, Mul)): + args = [self._visit_math_nodes(arg) for arg in node.args] # add casts when necessary # get first element out, store the rest in args # this makes the reduction easier @@ -160,14 +163,14 @@ def _visit_math_nodes(self, node: Expr) -> SSAValue: op_cls = arith.Addf if isinstance(node, Add) else arith.Mulf else: raise("Add support for another type") - for arg in args: op = op_cls(carry, arg) self.block.add_op(op) carry = op.result return carry - - if isinstance(node, Pow): + # Handle Pow + elif isinstance(node, Pow): + args = [self._visit_math_nodes(arg) for arg in node.args] assert len(args) == 2, "can't pow with != 2 args!" base, ex = args if is_int(base): @@ -188,11 +191,12 @@ def _visit_math_nodes(self, node: Expr) -> SSAValue: op = op_cls.get(base, ex) self.block.add_op(op) return op.result + # Handle Mod + elif isinstance(node, Mod): + raise NotImplementedError("Go away, no mod here. >:(") + else: + raise NotImplementedError(f"Unknown math: {node}", node) - if isinstance(node, Mod): - raise ValueError("Go away, no mod here. >:(") - - raise ValueError("Unknown math!") def _add_access_ops( self, reads: list[Indexed], time_offset_to_field: dict[int, SSAValue] @@ -257,10 +261,10 @@ def _ensure_same_type(self, *vals: SSAValue): if all(is_float(val) for val in vals): return vals # not everything homogeneous - new_vals = [] + processed = [] for val in vals: if is_float(val): - new_vals.append(val) + processed.append(val) continue # if the val is the result of a arith.constant with no uses, # we change the type of the arith.constant to our desired type @@ -273,13 +277,13 @@ def _ensure_same_type(self, *vals: SSAValue): val.op.attributes["value"] = builtin.FloatAttr( float(val.op.value.value.data), builtin.f32 ) - new_vals.append(val) + processed.append(val) continue # insert an integer to float cast op conv = arith.SIToFPOp(val, builtin.f32) self.block.add_op(conv) - new_vals.append(conv.result) - return new_vals + processed.append(conv.result) + return processed def _get_dim_offsets(idx: Indexed, t_offset: int) -> tuple: @@ -305,22 +309,6 @@ def _get_dim_offsets(idx: Indexed, t_offset: int) -> tuple: # #### # -------------------------------------------------------- #### -from dataclasses import dataclass, field - -from xdsl.pattern_rewriter import ( - GreedyRewritePatternApplier, - PatternRewriter, - PatternRewriteWalker, - RewritePattern, - op_type_rewrite_pattern, -) - -from devito.ir.ietxdsl.lowering import ( - LowerIetForToScfFor, -) - -from xdsl.dialects import llvm - @dataclass class WrapFunctionWithTransfers(RewritePattern): func_name: str diff --git a/devito/xdslpasses/__init__.py b/devito/xdslpasses/__init__.py deleted file mode 100644 index 4eb86f53f3..0000000000 --- a/devito/xdslpasses/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .iet import Callable # noqa \ No newline at end of file diff --git a/devito/xdslpasses/iet/__init__.py b/devito/xdslpasses/iet/__init__.py deleted file mode 100644 index feeeddfa40..0000000000 --- a/devito/xdslpasses/iet/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .parpragma import Callable # noqa diff --git a/devito/xdslpasses/iet/parpragma.py b/devito/xdslpasses/iet/parpragma.py deleted file mode 100644 index 97f0a519b3..0000000000 --- a/devito/xdslpasses/iet/parpragma.py +++ /dev/null @@ -1,57 +0,0 @@ -from devito.ir.ietxdsl import * -from xdsl.pattern_rewriter import RewritePattern, GreedyRewritePatternApplier, \ - op_type_rewrite_pattern, PatternRewriteWalker, PatternRewriter -import xdsl.dialects.builtin as builtin - - -# NOTE: this is WIP and needs refactoring ;) -@dataclass -class MakeSimdPattern(RewritePattern): - """ - This pattern reproduces the behaviour of PragmaSimdTransformer - """ - - def is_parallel_relaxed(self, iteration: Iteration) -> bool: - return any([ - prop.data - in ["parallel", "parallel_if_private", "parallel_if_private"] - for prop in iteration.properties.data - ]) - - @op_type_rewrite_pattern - def match_and_rewrite(self, iteration: Iteration, - rewriter: PatternRewriter): - - if (not self.is_parallel_relaxed(iteration)): - return - - # check if parent is parallel as well - parent_op = iteration.parent.parent.parent - if (not self.is_parallel_relaxed(parent_op)): - return - - # TODO how to only check for iteration trees? - # NOTE: currently only checking the first child - child_ops = iteration.body.blocks[0].ops - - # check if children is parallel as well - if (isinstance(child_ops[0], Iteration) - and self.is_parallel_relaxed(child_ops[0])): - return - - # TODO: insert additional checks - # toreview - # iteration.pragmas.data.append(StringAttr("simd-for")) - - -def construct_walker() -> PatternRewriteWalker: - applier = GreedyRewritePatternApplier([MakeSimdPattern()]) - - return PatternRewriteWalker(applier, - walk_regions_first=False, - apply_recursively=False) - - -def make_simd(ctx, op: builtin.ModuleOp): - walker = construct_walker() - walker.rewrite_module(op) From 40c56d1ecd296546c55435c139d917b725d376d9 Mon Sep 17 00:00:00 2001 From: George Bisbas Date: Tue, 28 Nov 2023 17:54:16 +0000 Subject: [PATCH 5/6] xdsl_operator: More cleanup --- devito/operator/xdsl_operator.py | 893 +------------------------------ 1 file changed, 1 insertion(+), 892 deletions(-) diff --git a/devito/operator/xdsl_operator.py b/devito/operator/xdsl_operator.py index 4c9e3e036b..7eddd7d30f 100644 --- a/devito/operator/xdsl_operator.py +++ b/devito/operator/xdsl_operator.py @@ -262,14 +262,6 @@ def setup_memref_args(self): self._jit_kernel_constants.update(args) - @classmethod - def _normalize_kwargs(cls, **kwargs): - return kwargs - - @classmethod - def _check_kwargs(cls, **kwargs): - return - @classmethod def _build(cls, expressions, **kwargs) -> Callable: debug("-Building operator") @@ -295,6 +287,7 @@ def _build(cls, expressions, **kwargs) -> Callable: # Required for the jit-compilation op._compiler = kwargs['compiler'] + op._language = kwargs['language'] op._lib = None op._cfunction = None @@ -323,10 +316,6 @@ def _build(cls, expressions, **kwargs) -> Callable: return op - def __init__(self, *args, **kwargs): - # Bypass the silent call to __init__ triggered through the backends engine - pass - # Compilation -- Expression level @classmethod @@ -368,433 +357,6 @@ def _lower(cls, expressions, **kwargs): return IRs(expressions, clusters, stree, uiet, iet), byproduct, module - @classmethod - def _rcompile_wrapper(cls, **kwargs): - def wrapper(expressions, kwargs=kwargs): - return rcompile(expressions, kwargs) - return wrapper - - @classmethod - def _initialize_state(cls, **kwargs): - return {} - - @classmethod - def _specialize_dsl(cls, expressions, **kwargs): - """ - Backend hook for specialization at the DSL level. The input is made of - expressions and other higher order objects such as Injection or - Interpolation; the expressions are still unevaluated at this stage, - meaning that they are still in tensorial form and derivatives aren't - expanded yet. - """ - return expressions - - @classmethod - def _specialize_exprs(cls, expressions, **kwargs): - """ - Backend hook for specialization at the expression level. - """ - return expressions - - @classmethod - @timed_pass(name='lowering.Expressions') - def _lower_exprs(cls, expressions, **kwargs): - """ - Expression lowering: - - * Apply rewrite rules; - * Evaluate derivatives; - * Flatten vectorial equations; - * Indexify Functions; - * Apply substitution rules; - * Shift indices for domain alignment. - """ - expand = kwargs['options'].get('expand', True) - - # Specialization is performed on unevaluated expressions - expressions = cls._specialize_dsl(expressions, **kwargs) - - # Lower FD derivatives - # NOTE: we force expansion of derivatives along SteppingDimensions - # because it drastically simplifies the subsequent lowering into - # ModuloDimensions - if not expand: - expand = lambda d: d.is_Stepping - expressions = flatten([i._evaluate(expand=expand) for i in expressions]) - - # Scalarize the tensor equations, if any - expressions = [j for i in expressions for j in i._flatten] - - # A second round of specialization is performed on evaluated expressions - expressions = cls._specialize_exprs(expressions, **kwargs) - - # "True" lowering (indexification, shifting, ...) - expressions = lower_exprs(expressions, **kwargs) - - processed = [LoweredEq(i) for i in expressions] - - return processed - - # Compilation -- Cluster level - - @classmethod - def _specialize_clusters(cls, clusters, **kwargs): - """ - Backend hook for specialization at the Cluster level. - """ - return clusters - - @classmethod - @timed_pass(name='lowering.Clusters') - def _lower_clusters(cls, expressions, profiler=None, **kwargs): - """ - Clusters lowering: - - * Group expressions into Clusters; - * Introduce guards for conditional Clusters; - * Analyze Clusters to detect computational properties such - as parallelism. - * Optimize Clusters for performance - """ - sregistry = kwargs['sregistry'] - - # Build a sequence of Clusters from a sequence of Eqs - clusters = clusterize(expressions, **kwargs) - - # Operation count before specialization - init_ops = sum(estimate_cost(c.exprs) for c in clusters if c.is_dense) - - clusters = cls._specialize_clusters(clusters, **kwargs) - - # Operation count after specialization - final_ops = sum(estimate_cost(c.exprs) for c in clusters if c.is_dense) - try: - profiler.record_ops_variation(init_ops, final_ops) - except AttributeError: - pass - - # Generate implicit Clusters from higher level abstractions - clusters = generate_implicit(clusters, sregistry=sregistry) - - # Lower all remaining high order symbolic objects - clusters = lower_index_derivatives(clusters, **kwargs) - - # Make sure no reconstructions can unpick any of the symbolic - # optimizations performed so far - clusters = unevaluate(clusters) - - return ClusterGroup(clusters) - - # Compilation -- ScheduleTree level - - @classmethod - def _specialize_stree(cls, stree, **kwargs): - """ - DEPRECATED: Backend hook for specialization at the Schedule tree level. - """ - return stree - - @classmethod - @timed_pass(name='lowering.ScheduleTree') - def _lower_stree(cls, clusters, **kwargs): - """ - Schedule tree lowering: - - * Turn a sequence of Clusters into a ScheduleTree; - * Derive and attach metadata for distributed-memory parallelism; - * Derive sections for performance profiling - """ - # DEPRECATED: Build a ScheduleTree from a sequence of Clusters - stree = stree_build(clusters, **kwargs) - stree = cls._specialize_stree(stree) - - return stree - - # Compilation -- Iteration/Expression tree level - - @classmethod - def _specialize_iet(cls, graph, **kwargs): - """ - Backend hook for specialization at the Iteration/Expression tree level. - """ - return graph - - @classmethod - @timed_pass(name='lowering.uIET') - def _lower_uiet(cls, stree, profiler=None, **kwargs): - """ - Turn a ScheduleTree into an unbounded Iteration/Expression tree, that is - in essence a "floating" IET where one or more variables may be unbounded - (i.e., no definition placed yet). - """ - # Build an unbounded IET from a ScheduleTree - uiet = iet_build(stree) - - # Analyze the IET Sections for C-level profiling - try: - profiler.analyze(uiet) - except AttributeError: - pass - - return uiet - - @classmethod - @timed_pass(name='lowering.IET') - def _lower_iet(cls, uiet, profiler=None, **kwargs): - """ - Iteration/Expression tree lowering: - - * Introduce distributed-memory, shared-memory, and SIMD parallelism; - * Introduce optimizations for data locality; - * Finalize (e.g., symbol definitions, array casts) - """ - name = kwargs.get("name", "Kernel") - sregistry = kwargs['sregistry'] - - # Wrap the IET with an EntryFunction (a special Callable representing - # the entry point of the generated library) - parameters = derive_parameters(uiet, True) - iet = EntryFunction(name, uiet, 'int', parameters, ()) - - # Lower IET to a target-specific IET - graph = Graph(iet, sregistry=sregistry) - graph = cls._specialize_iet(graph, **kwargs) - - # Instrument the IET for C-level profiling - # Note: this is postponed until after _specialize_iet because during - # specialization further Sections may be introduced - cls._Target.instrument(graph, profiler=profiler, **kwargs) - - # Extract the necessary macros from the symbolic objects - generate_macros(graph) - - # Target-independent optimizations - minimize_symbols(graph) - - return graph.root, graph - - # Read-only properties exposed to the outside world - - @cached_property - def reads(self): - return tuple(self._reads) - - @cached_property - def writes(self): - return tuple(self._writes) - - @cached_property - def dimensions(self): - ret = set().union(*[d._defines for d in self._dimensions]) - - # During compilation other Dimensions may have been produced - dimensions = FindSymbols('dimensions').visit(self) - ret.update(d for d in dimensions if d.is_PerfKnob) - - ret = tuple(sorted(ret, key=attrgetter('name'))) - - return ret - - @cached_property - def input(self): - return tuple(i for i in self.parameters if i.is_Input) - - @cached_property - def temporaries(self): - return tuple(i for i in self.parameters if i.is_TempFunction) - - @cached_property - def objects(self): - return tuple(i for i in self.parameters if i.is_Object) - - # Arguments processing - - @cached_property - def _access_modes(self): - """ - A table providing the AccessMode of all user-accessible symbols in `self`. - """ - return frozendict({i: AccessMode(i in self.reads, i in self.writes) - for i in self.input}) - - def _prepare_arguments(self, autotune=None, **kwargs): - """ - Process runtime arguments passed to ``.apply()` and derive - default values for any remaining arguments. - """ - # Sanity check -- all user-provided keywords must be known to the Operator - if not configuration['ignore-unknowns']: - for k, v in kwargs.items(): - if k not in self._known_arguments: - raise ValueError("Unrecognized argument %s=%s" % (k, v)) - - # Pre-process Dimension overrides. This may help ruling out ambiguities - # when processing the `defaults` arguments. A topological sorting is used - # as DerivedDimensions may depend on their parents - nodes = self.dimensions - edges = [(i, i.parent) for i in self.dimensions - if i.is_Derived and i.parent in set(nodes)] - toposort = DAG(nodes, edges).topological_sort() - - futures = {} - for d in reversed(toposort): - if set(d._arg_names).intersection(kwargs): - futures.update(d._arg_values(self._dspace[d], args={}, **kwargs)) - - overrides, defaults = split(self.input, lambda p: p.name in kwargs) - - # Process data-carrier overrides - args = kwargs['args'] = ReducerMap() - for p in overrides: - args.update(p._arg_values(**kwargs)) - try: - args.reduce_inplace() - except ValueError: - raise ValueError("Override `%s` is incompatible with overrides `%s`" % - (p, [i for i in overrides if i.name in args])) - - # Process data-carrier defaults - for p in defaults: - if p.name in args: - # E.g., SubFunctions - continue - for k, v in p._arg_values(**kwargs).items(): - if k not in args: - args[k] = v - elif k in futures: - # An explicit override is later going to set `args[k]` - pass - elif k in kwargs: - # User is in control - # E.g., given a ConditionalDimension `t_sub` with factor `fact` and - # a TimeFunction `usave(t_sub, x, y)`, an override for `fact` is - # supplied w/o overriding `usave`; that's legal - pass - elif is_integer(args[k]) and not contains_val(args[k], v): - raise ValueError("Default `%s` is incompatible with other args as " - "`%s=%s`, while `%s=%s` is expected. Perhaps you " - "forgot to override `%s`?" % - (p, k, v, k, args[k], p)) - - args = kwargs['args'] = args.reduce_all() - - # DiscreteFunctions may be created from CartesianDiscretizations, which in - # turn could be Grids or SubDomains. Both may provide arguments - discretizations = {getattr(kwargs[p.name], 'grid', None) for p in overrides} - discretizations.update({getattr(p, 'grid', None) for p in defaults}) - discretizations.discard(None) - # Remove subgrids if multiple grids - if len(discretizations) > 1: - discretizations = {g for g in discretizations - if not any(d.is_Derived for d in g.dimensions)} - - for i in discretizations: - args.update(i._arg_values(**kwargs)) - - # There can only be one Grid from which DiscreteFunctions were created - grids = {i for i in discretizations if isinstance(i, Grid)} - if len(grids) > 1: - # We loosely tolerate multiple Grids for backwards compatibility - # with spacial subsampling, which should be revisited however. And - # With MPI it would definitely break! - if configuration['mpi']: - raise ValueError("Multiple Grids found") - try: - grid = grids.pop() - except KeyError: - grid = None - - # An ArgumentsMap carries additional metadata that may be used by - # the subsequent phases of the arguments processing - args = kwargs['args'] = ArgumentsMap(args, grid, self) - - # Process Dimensions - for d in reversed(toposort): - args.update(d._arg_values(self._dspace[d], grid, **kwargs)) - - # Process Objects - for o in self.objects: - args.update(o._arg_values(grid=grid, **kwargs)) - - # In some "lower-level" Operators implementing a random piece of C, such as - # one or more calls to third-party library functions, there could still be - # at this point unprocessed arguments (e.g., scalars) - kwargs.pop('args') - args.update({k: v for k, v in kwargs.items() if k not in args}) - - # Sanity check - for p in self.parameters: - p._arg_check(args, self._dspace[p], am=self._access_modes.get(p)) - for d in self.dimensions: - if d.is_Derived: - d._arg_check(args, self._dspace[p]) - - # Turn arguments into a format suitable for the generated code - # E.g., instead of NumPy arrays for Functions, the generated code expects - # pointers to ctypes.Struct - for p in self.parameters: - try: - args.update(kwargs.get(p.name, p)._arg_finalize(args, alias=p)) - except AttributeError: - # User-provided floats/ndarray obviously do not have `_arg_finalize` - args.update(p._arg_finalize(args, alias=p)) - - # Execute autotuning and adjust arguments accordingly - args.update(self._autotune(args, autotune or configuration['autotuning'])) - - return args - - def _postprocess_arguments(self, args, **kwargs): - """Process runtime arguments upon returning from ``.apply()``.""" - for p in self.parameters: - try: - subfuncs = (args[getattr(p, s).name] for s in p._sub_functions) - p._arg_apply(args[p.name], *subfuncs, alias=kwargs.get(p.name)) - except AttributeError: - p._arg_apply(args[p.name], alias=kwargs.get(p.name)) - - @cached_property - def _known_arguments(self): - """The arguments that can be passed to ``apply`` when running the Operator.""" - ret = set() - for i in self.input: - ret.update(i._arg_names) - try: - ret.update(i.grid._arg_names) - except AttributeError: - pass - for d in self.dimensions: - ret.update(d._arg_names) - ret.update(p.name for p in self.parameters) - return frozenset(ret) - - def _autotune(self, args, setup): - """Auto-tuning to improve runtime performance.""" - return args - - def arguments(self, **kwargs): - """Arguments to run the Operator.""" - args = self._prepare_arguments(**kwargs) - # Check all arguments are present - for p in self.parameters: - if args.get(p.name) is None: - raise ValueError("No value found for parameter %s" % p.name) - return args - - # Code generation and JIT compilation - - # @cached_property - # def _soname(self): - # """A unique name for the shared object resulting from JIT compilation.""" - # return Signer._digest(self, configuration) - - @cached_property - def ccode(self): - try: - return self._ccode_handler(compiler=self._compiler).visit(self) - except (AttributeError, TypeError): - from devito.ir.iet.visitors import CGen - return CGen(compiler=self._compiler).visit(self) @property def cfunction(self): @@ -814,134 +376,6 @@ def cfunction(self): return self._cfunction - def cinterface(self, force=False): - """ - Generate two files under the prescribed temporary directory: - - * `X.c` (or `X.cpp`): the code generated for this Operator; - * `X.h`: an header file representing the interface of `X.c`. - - Where `X=self.name`. - - Parameters - ---------- - force : bool, optional - Overwrite any existing files. Defaults to False. - """ - dest = self._compiler.get_jit_dir() - name = dest.joinpath(self.name) - - cfile = name.with_suffix(".%s" % self._compiler.src_ext) - hfile = name.with_suffix('.h') - - # Generate the .c and .h code - ccode, hcode = CInterface().visit(self) - - for f, code in [(cfile, ccode), (hfile, hcode)]: - if not force and f.is_file(): - debug("`%s` was not saved in `%s` as it already exists" % (f.name, dest)) - else: - with open(str(f), 'w') as ff: - ff.write(str(code)) - debug("`%s` successfully saved in `%s`" % (f.name, dest)) - - return ccode, hcode - - # Execution - - def __call__(self, **kwargs): - return self.apply(**kwargs) - - def apply(self, **kwargs): - """ - Execute the Operator. - - With no arguments provided, the Operator runs using the data carried by the - objects appearing in the input expressions -- these are referred to as the - "default arguments". - - Optionally, any of the Operator default arguments may be replaced by passing - suitable key-value arguments. Given ``apply(k=v, ...)``, ``(k, v)`` may be - used to: - - * replace a Constant. In this case, ``k`` is the name of the Constant, - ``v`` is either a Constant or a scalar value. - - * replace a Function (SparseFunction). Here, ``k`` is the name of the - Function, ``v`` is either a Function or a numpy.ndarray. - - * alter the iteration interval along a Dimension. Consider a generic - Dimension ``d`` iterated over by the Operator. By default, the Operator - runs over all iterations within the compact interval ``[d_m, d_M]``, - where ``d_m`` and ``d_M`` are, respectively, the smallest and largest - integers not causing out-of-bounds memory accesses (for the Grid - Dimensions, this typically implies iterating over the entire physical - domain). So now ``k`` can be either ``d_m`` or ``d_M``, while ``v`` - is an integer value. - - Examples - -------- - Consider the following Operator - - >>> from devito import Eq, Grid, TimeFunction, Operator - >>> grid = Grid(shape=(3, 3)) - >>> u = TimeFunction(name='u', grid=grid, save=3) - >>> op = Operator(Eq(u.forward, u + 1)) - - The Operator is run by calling ``apply`` - - >>> summary = op.apply() - - The variable ``summary`` contains information about runtime performance. - As no key-value parameters are specified, the Operator runs with its - default arguments, namely ``u=u, x_m=0, x_M=2, y_m=0, y_M=2, time_m=0, - time_M=1``. - - At this point, the same Operator can be used for a completely different - run, for example - - >>> u2 = TimeFunction(name='u', grid=grid, save=5) - >>> summary = op.apply(u=u2, x_m=1, y_M=1) - - Now, the Operator will run with a different set of arguments, namely - ``u=u2, x_m=1, x_M=2, y_m=0, y_M=1, time_m=0, time_M=3``. - - To run an Operator that only uses buffered TimeFunctions, the maximum - iteration point along the time dimension must be explicitly specified - (otherwise, the Operator wouldn't know how many iterations to run). - - >>> u3 = TimeFunction(name='u', grid=grid) - >>> op = Operator(Eq(u3.forward, u3 + 1)) - >>> summary = op.apply(time_M=10) - """ - # Build the arguments list to invoke the kernel function - with self._profiler.timer_on('arguments'): - args = self.arguments(**kwargs) - self._jit_kernel_constants = args - - cfunction = self.cfunction - try: - # Invoke kernel function with args - arg_values = self._construct_cfunction_args(args) - with self._profiler.timer_on('apply', comm=args.comm): - cfunction(*arg_values) - except ctypes.ArgumentError as e: - if e.args[0].startswith("argument "): - argnum = int(e.args[0][9:].split(':')[0]) - 1 - newmsg = "error in argument '%s' with value '%s': %s" % ( - self.parameters[argnum].name, - arg_values[argnum], - e.args[0]) - raise ctypes.ArgumentError(newmsg) from e - else: - raise - - # Post-process runtime arguments - self._postprocess_arguments(args, **kwargs) - - # Output summary of performance achieved - return self._emit_apply_profiling(args) - def _construct_cfunction_args(self, args, get_types=False): """ Either construct the args for the cfunction, or construct the @@ -967,331 +401,6 @@ def _construct_cfunction_args(self, args, get_types=False): else: return things - def _emit_apply_profiling(self, args): - """Produce a performance summary of the profiled sections.""" - # Rounder to 2 decimal places - fround = lambda i: ceil(i * 100) / 100 - - elapsed = fround(self._profiler.py_timers['apply']) - info("Operator `%s` ran in %.2f s" % (self.name, elapsed)) - - summary = self._profiler.summary(args, self._dtype, reduce_over=elapsed) - - if not is_log_enabled_for('PERF'): - # Do not waste time - return summary - - if summary.globals: - # Note that with MPI enabled, the global performance indicators - # represent "cross-rank" performance data - metrics = [] - - v = summary.globals.get('vanilla') - if v is not None: - metrics.append("OI=%.2f" % fround(v.oi)) - metrics.append("%.2f GFlops/s" % fround(v.gflopss)) - - v = summary.globals.get('fdlike') - if v is not None: - metrics.append("%.2f GPts/s" % fround(v.gpointss)) - - if metrics: - perf("Global performance: [%s]" % ', '.join(metrics)) - - perf("Local performance:") - indent = " "*2 - else: - indent = "" - - if isinstance(self._profiler, AdvancedProfilerVerbose): - metrics = [] - - v = summary.globals.get('fdlike-nosetup') - if v is not None: - metrics.append("%.2f GPts/s" % fround(v.gpointss)) - - if metrics: - perf("Global performance : [%s]" % ', '.join(metrics)) - - # Emit local, i.e. "per-rank" performance. Without MPI, this is the only - # thing that will be emitted - def lower_perfentry(v): - if v.gflopss: - oi = "OI=%.2f" % fround(v.oi) - gflopss = "%.2f GFlops/s" % fround(v.gflopss) - gpointss = "%.2f GPts/s" % fround(v.gpointss) - return "[%s]" % ", ".join([oi, gflopss, gpointss]) - elif v.gpointss: - gpointss = "%.2f GPts/s" % fround(v.gpointss) - return "[%s]" % gpointss - else: - return "" - - for k, v in summary.items(): - rank = "[rank%d]" % k.rank if k.rank is not None else "" - - metrics = lower_perfentry(v) - - itershapes = [",".join(str(i) for i in its) for its in v.itershapes] - if len(itershapes) > 1: - itershapes = ",".join("<%s>" % i for i in itershapes) - elif len(itershapes) == 1: - itershapes = itershapes[0] - else: - itershapes = "" - name = "%s%s<%s>" % (k.name, rank, itershapes) - - perf("%s* %s ran in %.2f s %s" % (indent, name, fround(v.time), metrics)) - for n, v1 in summary.subsections.get(k.name, {}).items(): - metrics = lower_perfentry(v1) - - perf("%s+ %s ran in %.2f s [%.2f%%] %s" % - (indent*2, n, fround(v1.time), fround(v1.time/v.time*100), - metrics)) - - # Emit performance mode and arguments - perf_args = {} - for i in self.input + self.dimensions: - if not i.is_PerfKnob: - continue - try: - perf_args[i.name] = args[i.name] - except KeyError: - # Try with the aliases - for a in i._arg_names: - if a in args: - perf_args[a] = args[a] - break - perf("Performance[mode=%s] arguments: %s" % (self._mode, perf_args)) - - return summary - - # Pickling support - - def __getstate__(self): - if self._lib: - state = dict(self.__dict__) - # The compiled shared-object will be pickled; upon unpickling, it - # will be restored into a potentially different temporary directory, - # so the entire process during which the shared-object is loaded and - # given to ctypes must be performed again - state['_lib'] = None - state['_cfunction'] = None - # Do not pickle the `args` used to construct the Operator. Not only - # would this be completely useless, but it might also lead to - # allocating additional memory upon unpickling, as the user-provided - # equations typically carry different instances of the same Function - # (e.g., f(t, x-1), f(t, x), f(t, x+1)), which are different objects - # with distinct `.data` fields - state['_args'] = None - with open(self._lib._name, 'rb') as f: - state['binary'] = f.read() - state['soname'] = self._soname - return state - else: - return self.__dict__ - - def __getnewargs_ex__(self): - return (None,), {} - - def __setstate__(self, state): - soname = state.pop('soname', None) - binary = state.pop('binary', None) - for k, v in state.items(): - setattr(self, k, v) - if soname is not None: - self._compiler.save(soname, binary) - self._lib = self._compiler.load(soname) - self._lib.name = soname - - -# Default action (perform or bypass) for selected compilation passes upon -# recursive compilation -# NOTE: it may not only be pointless to apply the following passes recursively -# (because once, during the main compilation phase, is simply enough), but also -# dangerous as some of them (the minority) might break in some circumstances -# if applied in cascade (e.g., `linearization` on top of `linearization`) -rcompile_registry = { - 'mpi': False, - 'linearize': False, - 'place-transfers': False -} - - -def rcompile(expressions, kwargs=None): - """ - Perform recursive compilation on an ordered sequence of symbolic expressions. - """ - if not kwargs or 'options' not in kwargs: - kwargs = parse_kwargs(**kwargs) - cls = operator_selector(**kwargs) - kwargs = cls._normalize_kwargs(**kwargs) - else: - cls = operator_selector(**kwargs) - - # Tweak the compilation kwargs - options = dict(kwargs['options']) - options.update(rcompile_registry) - kwargs['options'] = options - - # Recursive profiling not supported -- would be a complete mess - kwargs.pop('profiler', None) - - return cls._lower(expressions, **kwargs) - - -# Misc helpers - -class ArgumentsMap(dict): - - def __init__(self, args, grid, op): - super().__init__(args) - - self.grid = grid - - self.allocator = op._allocator - self.platform = op._platform - # self.language = op._language - self.compiler = op._compiler - self.options = op._options - - @property - def comm(self): - """The MPI communicator the arguments are collective over.""" - return self.grid.comm if self.grid is not None else MPI.COMM_NULL - - @property - def opkwargs(self): - temp_registry = {v: k for k, v in platform_registry.items()} - platform = temp_registry[self.platform] - - temp_registry = {v: k for k, v in compiler_registry.items()} - compiler = temp_registry[self.compiler.__class__] - - return {'platform': platform, 'compiler': compiler, 'language': self.language} - - -def parse_kwargs(**kwargs): - """ - Parse keyword arguments provided to an Operator. - """ - # `dse` -- deprecated, dropped - dse = kwargs.pop("dse", None) - if dse is not None: - warning("The `dse` argument is deprecated. " - "The optimization level is now controlled via the `opt` argument") - - # `dle` -- deprecated, replaced by `opt` - if 'dle' in kwargs: - warning("The `dle` argument is deprecated. " - "The optimization level is now controlled via the `opt` argument") - dle = kwargs.pop('dle') - if 'opt' in kwargs: - warning("Both `dle` and `opt` were passed; ignoring `dle` argument") - opt = kwargs.pop('opt') - else: - warning("Setting `opt=%s`" % str(dle)) - opt = dle - elif 'opt' in kwargs: - opt = kwargs.pop('opt') - else: - opt = configuration['opt'] - - if not opt or isinstance(opt, str): - mode, options = opt, {} - elif isinstance(opt, tuple): - if len(opt) == 0: - mode, options = 'noop', {} - elif isinstance(opt[-1], dict): - if len(opt) == 2: - mode, options = opt - else: - mode, options = tuple(flatten(i.split(',') for i in opt[:-1])), opt[-1] - else: - mode, options = tuple(flatten(i.split(',') for i in opt)), {} - else: - raise InvalidOperator("Illegal `opt=%s`" % str(opt)) - - # `opt`, deprecated kwargs - kwopenmp = kwargs.get('openmp', options.get('openmp')) - if kwopenmp is None: - openmp = kwargs.get('language', configuration['language']) == 'openmp' - else: - openmp = kwopenmp - - # `opt`, options - options = dict(options) - options.setdefault('openmp', openmp) - options.setdefault('mpi', configuration['mpi']) - for k, v in configuration['opt-options'].items(): - options.setdefault(k, v) - # Handle deprecations - deprecated_options = ('cire-mincost-inv', 'cire-mincost-sops', 'cire-maxalias') - for i in deprecated_options: - try: - options.pop(i) - warning("Ignoring deprecated optimization option `%s`" % i) - except KeyError: - pass - kwargs['options'] = options - - # `opt`, mode - if mode is None: - mode = 'noop' - kwargs['mode'] = mode - - # `platform` - platform = kwargs.get('platform') - if platform is not None: - if not isinstance(platform, str): - raise ValueError("Argument `platform` should be a `str`") - if platform not in configuration._accepted['platform']: - raise InvalidOperator("Illegal `platform=%s`" % str(platform)) - kwargs['platform'] = platform_registry[platform]() - else: - kwargs['platform'] = configuration['platform'] - - # `language` - language = kwargs.get('language') - if language is not None: - if not isinstance(language, str): - raise ValueError("Argument `language` should be a `str`") - if language not in configuration._accepted['language']: - raise InvalidOperator("Illegal `language=%s`" % str(language)) - kwargs['language'] = language - elif kwopenmp is not None: - # Handle deprecated `openmp` kwarg for backward compatibility - kwargs['language'] = 'openmp' if openmp else 'C' - else: - kwargs['language'] = configuration['language'] - - # `compiler` - compiler = kwargs.get('compiler') - if compiler is not None: - if not isinstance(compiler, str): - raise ValueError("Argument `compiler` should be a `str`") - if compiler not in configuration._accepted['compiler']: - raise InvalidOperator("Illegal `compiler=%s`" % str(compiler)) - kwargs['compiler'] = compiler_registry[compiler](platform=kwargs['platform'], - language=kwargs['language'], - mpi=configuration['mpi']) - elif any([platform, language]): - kwargs['compiler'] =\ - configuration['compiler'].__new_with__(platform=kwargs['platform'], - language=kwargs['language'], - mpi=configuration['mpi']) - else: - kwargs['compiler'] = configuration['compiler'].__new_with__() - - # `allocator` - kwargs['allocator'] = default_allocator( - '%s.%s.%s' % (kwargs['compiler'].name, - kwargs['language'], - kwargs['platform']) - ) - - return kwargs - def get_arg_names_from_module(op): return [ From 97faa0762bc6d2d387b0d13ecc8dcbf437ed1d0b Mon Sep 17 00:00:00 2001 From: George Bisbas Date: Tue, 28 Nov 2023 18:12:19 +0000 Subject: [PATCH 6/6] cleanup: drop more obsolete code --- devito/operator/xdsl_operator.py | 69 ++++++++++++-------------------- 1 file changed, 26 insertions(+), 43 deletions(-) diff --git a/devito/operator/xdsl_operator.py b/devito/operator/xdsl_operator.py index 7eddd7d30f..7166eac3c0 100644 --- a/devito/operator/xdsl_operator.py +++ b/devito/operator/xdsl_operator.py @@ -1,41 +1,25 @@ import os import subprocess -import ctypes import tempfile -from math import ceil from collections import OrderedDict from io import StringIO -from operator import attrgetter -from cached_property import cached_property from devito import Operator -from devito.arch import compiler_registry, platform_registry -from devito.data import default_allocator from devito.exceptions import InvalidOperator -from devito.ir.clusters import ClusterGroup, clusterize -from devito.ir.equations import LoweredEq, lower_exprs -from devito.ir.iet import (Callable, CInterface, EntryFunction, FindSymbols, MetaCall, - derive_parameters, iet_build) + +from devito.ir.iet import Callable, MetaCall from devito.ir.ietxdsl import (finalize_module_with_globals) -from devito.ir.stree import stree_build -from devito.ir.support import AccessMode, SymbolRegistry +from devito.ir.support import SymbolRegistry from devito.ir.ietxdsl.cluster_to_ssa import (ExtractDevitoStencilConversion, convert_devito_stencil_to_xdsl_stencil) -from devito.logger import debug, info, perf, warning, is_log_enabled_for +from devito.logger import debug, info, perf from devito.operator.operator import IRs -from devito.operator.profiling import AdvancedProfilerVerbose, create_profile -from devito.operator.registry import operator_selector -from devito.parameters import configuration -from devito.passes import (Graph, lower_index_derivatives, generate_implicit, - generate_macros, minimize_symbols, unevaluate) +from devito.operator.profiling import create_profile from devito.passes.iet import CTarget -from devito.symbolics import estimate_cost -from devito.tools import (DAG, ReducerMap, as_tuple, flatten, - filter_sorted, frozendict, is_integer, split, timed_pass, - contains_val) -from devito.types import Evaluable, TimeFunction, Grid +from devito.tools import as_tuple, flatten, filter_sorted +from devito.types import Evaluable, TimeFunction from devito.types.mlir_types import ptr_of, f32 from devito.mpi import MPI @@ -48,25 +32,6 @@ __all__ = ['XDSLOperator'] -# small interop shim script for stuff that we don't want to implement in mlir-ir -_INTEROP_C = """ -#include - -double timer_start() { - // return a number representing the current point in time - // it might be offset by a fixed ammount - struct timespec t; - clock_gettime(CLOCK_MONOTONIC, &t); - return (t.tv_sec) + (t.tv_nsec * 1e-9); -} - -double timer_end(double start) { - // return time elaspes since start in seconds - return (timer_start() - start); -} -""" - - class XDSLOperator(Operator): _Target = CTarget @@ -357,7 +322,6 @@ def _lower(cls, expressions, **kwargs): return IRs(expressions, clusters, stree, uiet, iet), byproduct, module - @property def cfunction(self): """The JIT-compiled C function as a ctypes.FuncPtr object.""" @@ -406,3 +370,22 @@ def get_arg_names_from_module(op): return [ str_attr.data for str_attr in op.body.block.ops.first.attributes['param_names'].data # noqa ] + + +# small interop shim script for stuff that we don't want to implement in mlir-ir +_INTEROP_C = """ +#include + +double timer_start() { + // return a number representing the current point in time + // it might be offset by a fixed ammount + struct timespec t; + clock_gettime(CLOCK_MONOTONIC, &t); + return (t.tv_sec) + (t.tv_nsec * 1e-9); +} + +double timer_end(double start) { + // return time elaspes since start in seconds + return (timer_start() - start); +} +"""