Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GPU Support #574

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
66 changes: 66 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -104,3 +104,69 @@ jobs:
jekyll: false
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

test_opencl:
runs-on: ubuntu-latest
env:
CC: mpicc
PETSC_DIR: ${{ github.workspace }}/petsc
PETSC_ARCH: default
PETSC_CONFIGURE_OPTIONS: --with-debugging=1 --with-shared-libraries=1 --with-c2html=0 --with-fortran-bindings=0 --with-opencl=1 --download-viennacl --with-viennacl=1

steps:
- name: Install system dependencies
shell: bash
run: |
sudo apt update
sudo apt install build-essential mpich libmpich-dev \
libblas-dev liblapack-dev gfortran
sudo apt install ocl-icd-opencl-dev pocl-opencl-icd

- name: Set correct Python version
uses: actions/setup-python@v2
with:
python-version: '3.10'

- name: Clone PETSc
uses: actions/checkout@v2
with:
repository: firedrakeproject/petsc
path: ${{ env.PETSC_DIR }}

- name: Build and install PETSc
shell: bash
working-directory: ${{ env.PETSC_DIR }}
run: |
./configure ${PETSC_CONFIGURE_OPTIONS}
make

- name: Build and install petsc4py
shell: bash
working-directory: ${{ env.PETSC_DIR }}/src/binding/petsc4py
run: |
python -m pip install --upgrade cython numpy
python -m pip install --no-deps .

- name: Checkout PyOP2
uses: actions/checkout@v2
with:
path: PyOP2

- name: Install PyOP2
shell: bash
working-directory: PyOP2
run: |
python -m pip install pip==20.2 # pip 20.2 needed for loopy install to work.

# xargs is used to force installation of requirements in the order we specified.
xargs -l1 python -m pip install < requirements-ext.txt
xargs -l1 python -m pip install < requirements-git.txt
python -m pip install pulp
python -m pip install -U flake8
python -m pip install pyopencl
python -m pip install .

- name: Run tests
shell: bash
working-directory: PyOP2
run: pytest test -v --tb=native
44 changes: 44 additions & 0 deletions pyop2/backends/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
class _not_implemented: # noqa
"""Not Implemented"""


class AbstractComputeBackend:
"""
Abstract class to record all the backend specific implementation of
:mod:`pyop2`'s data structures.
"""
GlobalKernel = _not_implemented
Parloop = _not_implemented
Set = _not_implemented
ExtrudedSet = _not_implemented
MixedSet = _not_implemented
Subset = _not_implemented
DataSet = _not_implemented
MixedDataSet = _not_implemented
Map = _not_implemented
MixedMap = _not_implemented
Dat = _not_implemented
MixedDat = _not_implemented
DatView = _not_implemented
Mat = _not_implemented
Global = _not_implemented
Constant = _not_implemented
GlobalDataSet = _not_implemented
PETScVecType = _not_implemented

def __getattribute__(self, key):
val = super().__getattribute__(key)
if val is _not_implemented:
raise NotImplementedError(f"'{val}' is not implemented for backend"
f" '{type(self).__name__}'.")
return val

def turn_on_offloading(self):
raise NotImplementedError()

def turn_off_offloading(self):
raise NotImplementedError()

@property
def cache_key(self):
raise NotImplementedError()
279 changes: 279 additions & 0 deletions pyop2/backends/cpu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,279 @@
from pyop2.types.dat import Dat as BaseDat, MixedDat, DatView
from pyop2.types.set import Set, ExtrudedSet, Subset, MixedSet
from pyop2.types.dataset import DataSet, GlobalDataSet, MixedDataSet
from pyop2.types.map import Map, MixedMap
from pyop2.parloop import AbstractParloop
from pyop2.global_kernel import AbstractGlobalKernel
from pyop2.types.access import READ, INC, MIN, MAX
from pyop2.types.mat import Mat
from pyop2.types.glob import Global as BaseGlobal, Constant as BaseConstant
from pyop2.backends import AbstractComputeBackend
from petsc4py import PETSc
from pyop2 import (
compilation,
mpi,
utils
)

import ctypes
import os
import loopy as lp
from contextlib import contextmanager
import numpy as np


class Dat(BaseDat):
@utils.cached_property
def _vec(self):
assert self.dtype == PETSc.ScalarType, \
"Can't create Vec with type %s, must be %s" % (self.dtype,
PETSc.ScalarType)
# Can't duplicate layout_vec of dataset, because we then
# carry around extra unnecessary data.
# But use getSizes to save an Allreduce in computing the
# global size.
size = self.dataset.layout_vec.getSizes()
data = self._data[:size[0]]
vec = PETSc.Vec().createWithArray(data, size=size,
bsize=self.cdim, comm=self.comm)
return vec

@contextmanager
def vec_context(self, access):
# PETSc Vecs have a state counter and cache norm computations
# to return immediately if the state counter is unchanged.
# Since we've updated the data behind their back, we need to
# change that state counter.
self._vec.stateIncrease()
yield self._vec
if access is not READ:
self.halo_valid = False

def ensure_availability_on_device(self):
from pyop2.op2 import compute_backend
assert compute_backend is cpu_backend
# data transfer is noop for CPU backend

def ensure_availability_on_host(self):
from pyop2.op2 import compute_backend
assert compute_backend is cpu_backend
# data transfer is noop for CPU backend

@mpi.collective
def copy(self, other, subset=None):
if other is self:
return
if subset is None:
# If the current halo is valid we can also copy these values across.
if self.halo_valid:
other._data[:] = self.data_ro
other.halo_valid = True
else:
other.data[:] = self.data_ro
elif subset.superset != self.dataset.set:
raise ex.MapValueError("The subset and dataset are incompatible")
else:
other.data[subset.owned_indices] = self.data_ro[subset.owned_indices]


class Global(BaseGlobal):
@utils.cached_property
def _vec(self):
assert self.dtype == PETSc.ScalarType, \
"Can't create Vec with type %s, must be %s" % (self.dtype,
PETSc.ScalarType)
# Can't duplicate layout_vec of dataset, because we then
# carry around extra unnecessary data.
# But use getSizes to save an Allreduce in computing the
# global size.
data = self._data
size = self.dataset.layout_vec.getSizes()
if self.comm.rank == 0:
return PETSc.Vec().createWithArray(data, size=size,
bsize=self.cdim,
comm=self.comm)
else:
return PETSc.Vec().createWithArray(np.empty(0, dtype=self.dtype),
size=size,
bsize=self.cdim,
comm=self.comm)

@contextmanager
def vec_context(self, access):
"""A context manager for a :class:`PETSc.Vec` from a :class:`Global`.

:param access: Access descriptor: READ, WRITE, or RW."""
yield self._vec
if access is not READ:
data = self._data
self.comm.Bcast(data, 0)

def ensure_availability_on_device(self):
from pyop2.op2 import compute_backend
assert compute_backend is cpu_backend
# data transfer is noop for CPU backend

def ensure_availability_on_host(self):
from pyop2.op2 import compute_backend
assert compute_backend is cpu_backend
# data transfer is noop for CPU backend


class Constant(BaseConstant):
@utils.cached_property
def _vec(self):
assert self.dtype == PETSc.ScalarType, \
"Can't create Vec with type %s, must be %s" % (self.dtype,
PETSc.ScalarType)
# Can't duplicate layout_vec of dataset, because we then
# carry around extra unnecessary data.
# But use getSizes to save an Allreduce in computing the
# global size.
data = self._data
size = self.dataset.layout_vec.getSizes()
if self.comm.rank == 0:
return PETSc.Vec().createWithArray(data, size=size,
bsize=self.cdim,
comm=self.comm)
else:
return PETSc.Vec().createWithArray(np.empty(0, dtype=self.dtype),
size=size,
bsize=self.cdim,
comm=self.comm)

@contextmanager
def vec_context(self, access):
"""A context manager for a :class:`PETSc.Vec` from a :class:`Global`.

:param access: Access descriptor: READ, WRITE, or RW."""
yield self._vec
if access is not READ:
data = self._data
self.comm.Bcast(data, 0)

def ensure_availability_on_device(self):
from pyop2.op2 import compute_backend
assert compute_backend is cpu_backend
# data transfer is noop for CPU backend

def ensure_availability_on_host(self):
from pyop2.op2 import compute_backend
assert compute_backend is cpu_backend
# data transfer is noop for CPU backend


class GlobalKernel(AbstractGlobalKernel):

@utils.cached_property
def code_to_compile(self):
"""Return the C/C++ source code as a string."""
from pyop2.codegen.rep2loopy import generate

wrapper = generate(self.builder)
code = lp.generate_code_v2(wrapper)

if self.local_kernel.cpp:
from loopy.codegen.result import process_preambles
preamble = "".join(
process_preambles(getattr(code, "device_preambles", [])))
device_code = "\n\n".join(str(dp.ast) for dp in code.device_programs)
return preamble + '\nextern "C" {\n' + device_code + "\n}\n"
return code.device_code()

@PETSc.Log.EventDecorator()
@mpi.collective
def compile(self, comm):
"""Compile the kernel.

:arg comm: The communicator the compilation is collective over.
:returns: A ctypes function pointer for the compiled function.
"""
extension = "cpp" if self.local_kernel.cpp else "c"
cppargs = (
tuple("-I%s/include" % d for d in utils.get_petsc_dir())
+ tuple("-I%s" % d for d in self.local_kernel.include_dirs)
+ ("-I%s" % os.path.abspath(os.path.dirname(__file__)),)
)
ldargs = (
tuple("-L%s/lib" % d for d in utils.get_petsc_dir())
+ tuple("-Wl,-rpath,%s/lib" % d for d in utils.get_petsc_dir())
+ ("-lpetsc", "-lm")
+ tuple(self.local_kernel.ldargs)
)

return compilation.load(self, extension, self.name,
cppargs=cppargs,
ldargs=ldargs,
restype=ctypes.c_int,
comm=comm)


class Parloop(AbstractParloop):
@PETSc.Log.EventDecorator("ParLoopRednBegin")
@mpi.collective
def reduction_begin(self):
"""Begin reductions."""
requests = []
for idx in self._reduction_idxs:
glob = self.arguments[idx].data
mpi_op = {INC: mpi.MPI.SUM,
MIN: mpi.MPI.MIN,
MAX: mpi.MPI.MAX}.get(self.accesses[idx])

if mpi.MPI.VERSION >= 3:
requests.append(self.comm.Iallreduce(glob._data,
glob._buf,
op=mpi_op))
else:
self.comm.Allreduce(glob._data, glob._buf, op=mpi_op)
return tuple(requests)

@PETSc.Log.EventDecorator("ParLoopRednEnd")
@mpi.collective
def reduction_end(self, requests):
"""Finish reductions."""
if mpi.MPI.VERSION >= 3:
mpi.MPI.Request.Waitall(requests)
for idx in self._reduction_idxs:
glob = self.arguments[idx].data
glob._data[:] = glob._buf
else:
assert len(requests) == 0

for idx in self._reduction_idxs:
glob = self.arguments[idx].data
glob._data[:] = glob._buf


class CPUBackend(AbstractComputeBackend):
GlobalKernel = GlobalKernel
Parloop = Parloop
Set = Set
ExtrudedSet = ExtrudedSet
MixedSet = MixedSet
Subset = Subset
DataSet = DataSet
MixedDataSet = MixedDataSet
Map = Map
MixedMap = MixedMap
Dat = Dat
MixedDat = MixedDat
DatView = DatView
Mat = Mat
Global = Global
Constant = Constant
GlobalDataSet = GlobalDataSet
PETScVecType = PETSc.Vec.Type.STANDARD

def turn_on_offloading(self):
pass

def turn_off_offloading(self):
pass

@property
def cache_key(self):
return (type(self),)


cpu_backend = CPUBackend()
Loading
Loading