Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

mpi: Add custom topology from devito codebase #20

Merged
merged 6 commits into from
Oct 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion devito/ir/ietxdsl/cluster_to_ssa.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def _convert_eq(self, eq: LoweredEq):
), f"can only write to offset [0,0,0], given {offsets[1:]}"

self.block.add_op(stencil.ReturnOp.get([rhs_result]))
outermost_block.add_op(func.Return.get())
outermost_block.add_op(func.Return())

return func.FuncOp.from_region(
"apply_kernel", [], [], Region([outermost_block])
Expand Down
2 changes: 1 addition & 1 deletion devito/ir/ietxdsl/xdsl_passes.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ def _op_to_func(op: Operator):
ietxdsl_functions.myVisit(i, block=block, ssa_vals=ssa_val_dict)

# add a trailing return
block.add_op(func.Return.get())
block.add_op(func.Return())

func_op = func.FuncOp.from_region(str(op.name), arg_types, [], Region([block]))

Expand Down
114 changes: 111 additions & 3 deletions devito/mpi/distributed.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from abc import ABC, abstractmethod
from ctypes import c_int, c_void_p, sizeof
from itertools import groupby, product
from math import ceil
from abc import ABC, abstractmethod
from math import ceil, pow
from sympy import factorint

import atexit

from cached_property import cached_property
Expand Down Expand Up @@ -194,6 +196,7 @@ def __init__(self, shape, dimensions, input_comm=None, topology=None):
# mpi4py takes care of that when the object gets out of scope
self._input_comm = (input_comm or MPI.COMM_WORLD).Clone()

topology = ('*', '*', 1)
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This "hack" will always decompose x and y in a 3d problem

if topology is None:
# `MPI.Compute_dims` sets the dimension sizes to be as close to each other
# as possible, using an appropriate divisibility algorithm. Thus, in 3D:
Expand All @@ -204,6 +207,9 @@ def __init__(self, shape, dimensions, input_comm=None, topology=None):
# guarantee that 9 ranks are arranged into a 3x3 grid when shape=(9, 9))
self._topology = compute_dims(self._input_comm.size, len(shape))
else:
# A custom topology may contain integers or the wildcard '*'
topology = CustomTopology(topology, self._input_comm)

self._topology = topology

if self._input_comm is not input_comm:
Expand Down Expand Up @@ -253,9 +259,18 @@ def nprocs(self):
def topology(self):
return self._topology

@property
def topology_logical(self):
if isinstance(self.topology, CustomTopology):
return self.topology.logical
else:
return None

@cached_property
def is_boundary_rank(self):
""" MPI rank interfaces with the boundary of the domain. """
"""
MPI rank interfaces with the boundary of the domain.
"""
return any([True if i == 0 or i == j-1 else False for i, j in
zip(self.mycoords, self.topology)])

Expand Down Expand Up @@ -550,6 +565,99 @@ def _arg_values(self, *args, **kwargs):
return self._arg_defaults()


class CustomTopology(tuple):

"""
The CustomTopology class provides a mechanism to describe parametric domain
decompositions. It allows users to specify how the dimensions of a domain are
decomposed into chunks based on certain parameters.

Examples
--------
For example, let's consider a domain with three distributed dimensions: x, y, and z,
and an MPI communicator with N processes. Here are a few examples of CustomTopology:

With N known, say N=4:
* `(1, 1, 4)`: the z Dimension is decomposed into 4 chunks
* `(2, 1, 2)`: the x Dimension is decomposed into 2 chunks and the z Dimension
is decomposed into 2 chunks

With N unknown:
* `(1, '*', 1)`: the wildcard `'*'` indicates that the runtime should decompose the y
Dimension into N chunks
* `('*', '*', 1)`: the wildcard `'*'` indicates that the runtime should decompose both
the x and y Dimensions in `nstars` factors of N, prioritizing
the outermost dimension

Assuming that the number of ranks `N` cannot evenly be decomposed to the requested
stars=6 we decompose as evenly as possible by prioritising the outermost dimension

For N=3
* `('*', '*', 1)` gives: (3, 1, 1)
* `('*', 1, '*')` gives: (3, 1, 1)
* `(1, '*', '*')` gives: (1, 3, 1)

For N=6
* `('*', '*', 1)` gives: (3, 2, 1)
* `('*', 1, '*')` gives: (3, 1, 2)
* `(1, '*', '*')` gives: (1, 3, 2)

For N=8
* `('*', '*', '*')` gives: (2, 2, 2)
* `('*', '*', 1)` gives: (4, 2, 1)
* `('*', 1, '*')` gives: (4, 1, 2)
* `(1, '*', '*')` gives: (1, 4, 2)

Notes
-----
Users should not directly use the CustomTopology class. It is instantiated
by the Devito runtime based on user input.
"""

def __new__(cls, items, input_comm):
# Keep track of nstars and already defined decompositions
nstars = items.count('*')

# If no stars exist we are ready
if nstars == 0:
processed = items
else:
# Init decomposition list and track star positions
processed = [1] * len(items)
star_pos = []
for i, item in enumerate(items):
if isinstance(item, int):
processed[i] = item
else:
star_pos.append(i)

# Compute the remaining procs to be allocated
alloc_procs = np.prod([i for i in items if i != '*'])
rem_procs = int(input_comm.size // alloc_procs)

# List of all factors of rem_procs in decreasing order
factors = factorint(rem_procs)
vals = [k for (k, v) in factors.items() for _ in range(v)][::-1]

# Split in number of stars
split = np.array_split(vals, nstars)

# Reduce
star_vals = [int(np.prod(s)) for s in split]

# Apply computed star values to the processed
for index, value in zip(star_pos, star_vals):
processed[index] = value

# Final check that topology matches the communicator size
assert np.prod(processed) == input_comm.size

obj = super().__new__(cls, processed)
obj.logical = items

return obj


def compute_dims(nprocs, ndim):
# We don't do anything clever here. In fact, we do something very basic --
# we just try to distribute `nprocs` evenly over the number of dimensions,
Expand Down
21 changes: 15 additions & 6 deletions devito/operator/xdsl_operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,11 @@ def _jit_compile(self):
with open(backdoor, 'r') as f:
module_str = f.read()

source_name = os.path.splitext(self._tf.name)[0] + ".mlir"
source_file = open(source_name, "w")
source_file.write(module_str)
source_file.close()

# compile IR using xdsl-opt | mlir-opt | mlir-translate | clang
try:
cflags = CFLAGS
Expand All @@ -151,17 +156,21 @@ def _jit_compile(self):
if is_gpu:
cflags += " -lmlir_cuda_runtime "

cmd = f'xdsl-opt -p {xdsl_pipeline} |' \
# TODO More detailed error handling manually,
# instead of relying on a bash-only feature.
cmd = 'set -eo pipefail; '\
f'xdsl-opt {source_name} -p {xdsl_pipeline} |' \
f'mlir-opt -p {mlir_pipeline} | ' \
f'mlir-translate --mlir-to-llvmir | ' \
f'{cc} {cflags} -shared -o {self._tf.name} {self._interop_tf.name} -xir -'

print(cmd)
res = subprocess.run(
cmd,
shell=True,
input=module_str,
text=True,
capture_output=True,
executable="/bin/bash"
)

if res.returncode != 0:
Expand All @@ -172,13 +181,13 @@ def _jit_compile(self):
except Exception as ex:
print("error")
raise ex
#print(res.stderr)
# print(res.stderr)

elapsed = self._profiler.py_timers['jit-compile']

perf("XDSLOperator `%s` jit-compiled `%s` in %.2f s with `mlir-opt`" %
(self.name, self._tf.name, elapsed))
(self.name, source_name, elapsed))

@property
def _soname(self):
return self._tf.name
Expand Down
28 changes: 9 additions & 19 deletions fast/diffusion_2D_wBCs.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,9 @@
u = TimeFunction(name='u', grid=grid, space_order=so)

# Reset our data field and ICs
# u.data[:, :, :] = 0.1
init_hat(field=u.data[0], dx=dx, dy=dy, value=1.)


# u.data[0, :, int(ny/2)] = 2

a = Constant(name='a')
# Create an equation with second-order derivatives
eq = Eq(u.dt, a * u.laplace, subdomain=grid.interior)
Expand All @@ -58,23 +55,9 @@
x, y = grid.dimensions
t = grid.stepping_dim

# Add boundary conditions
# bc = [Eq(u[t+1, x, y, 0], 2.)] # bottom
# bc += [Eq(u[t+1, x, y, nz-1], 2.)] # top
# bc += [Eq(u[t+1, 0, y, z], 2.)] # left
# bc += [Eq(u[t+1, nx-1, y, z], 2.)] # right

# bc += [Eq(u[t+1, x, 0, z], 2.)] # front
# bc += [Eq(u[t+1, x, ny-1, z], 2.)] # back

print(eq_stencil)

# Create an operator that updates the forward stencil point
# plus adding boundary conditions
# op = Operator([eq_stencil] + bc, subdomain=grid.interior)

# No BCs
# op = XDSLOperator([eq_stencil])
op = Operator([eq_stencil])
# print(op.ccode)
# dt = 0.00002
Expand All @@ -83,8 +66,15 @@
print(dt)
op.apply(time=nt, dt=dt, a=nu)

print("Field norm is:", norm(u))
print("Devito Field norm is:", norm(u))

plot_image(u.data[0], cmap="seismic")

# import pdb;pdb.set_trace()

# Reset our data field and ICs
init_hat(field=u.data[0], dx=dx, dy=dy, value=1.)
xdslop = XDSLOperator([eq_stencil])
xdslop.apply(time=nt, dt=dt, a=nu)
plot_image(u.data[0], cmap="seismic")

print("xDSL Field norm is:", norm(u))
32 changes: 14 additions & 18 deletions fast/diffusion_3D_wBCs.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def plot_3dfunc(u):

cmap = plt.cm.get_cmap("viridis")
values = u.data[0, :, :, :]
vistagrid = pv.UniformGrid()
vistagrid = pv.ImageData()
vistagrid.dimensions = np.array(values.shape) + 1
vistagrid.spacing = (1, 1, 1)
vistagrid.origin = (0, 0, 0) # The bottom left corner of the data set
Expand Down Expand Up @@ -73,31 +73,27 @@ def plot_3dfunc(u):
x, y, z = grid.dimensions
t = grid.stepping_dim

# Add boundary conditions
# bc = [Eq(u[t+1, x, y, 0], 2.)] # bottom
# bc += [Eq(u[t+1, x, y, nz-1], 2.)] # top
# bc += [Eq(u[t+1, 0, y, z], 2.)] # left
# bc += [Eq(u[t+1, nx-1, y, z], 2.)] # right

# bc += [Eq(u[t+1, x, 0, z], 2.)] # front
# bc += [Eq(u[t+1, x, ny-1, z], 2.)] # back

print(eq_stencil)

# Create an operator that updates the forward stencil point
# plus adding boundary conditions
# op = Operator([eq_stencil] + bc, subdomain=grid.interior)

# No BCs
# op = XDSLOperator([eq_stencil])
op = Operator([eq_stencil])
# print(op.ccode)

# Apply the operator for a number of timesteps
op.apply(time=nt, dt=dt, a=nu)

if args.plot:
plot_3dfunc(u)

print("Field norm is:", norm(u))
# import pdb;pdb.set_trace()
print("Devito Field norm is:", norm(u))

# Reset data for XDSL
u.data[:, :, :, :] = 0
u.data[:, :, :, int(nz/2)] = 1

xdslop = XDSLOperator([eq_stencil])
xdslop.apply(time=nt, dt=dt, a=nu)

if args.plot:
plot_3dfunc(u)

print("xDSL Field norm is:", norm(u))
Loading
Loading