diff --git a/pyop2/gpu/cuda.py b/pyop2/gpu/cuda.py index b0e48b29d..93499bbab 100644 --- a/pyop2/gpu/cuda.py +++ b/pyop2/gpu/cuda.py @@ -85,10 +85,21 @@ def _kernel_args_(self): class Arg(Arg): """ - Arg for GPU + Arg for GPU. """ +class ExtrudedSet(ExtrudedSet): + """ + ExtrudedSet for GPU. + """ + @cached_property + def _kernel_args_(self): + m_gpu = cuda.mem_alloc(int(self.layers_array.nbytes)) + cuda.memcpy_htod(m_gpu, self.layers_array) + return (m_gpu,) + + class Dat(petsc_Dat): """ Dat for GPU. @@ -373,11 +384,9 @@ def argtypes(self): @cached_property def argshapes(self): argshapes = ((), ()) - # argtypes += self._iterset._argtypes_ if self._iterset._argtypes_: - raise NotImplementedError("Do not know what to do when" - " self._iterset._argtypes is not empty, is this the case" - " when we have extruded mesh") + # TODO: verify that this bogus value doesn't affect anyone. + argshapes += ((), ) for arg in self._args: argshapes += (arg.data.shape, ) diff --git a/pyop2/gpu/snpt.py b/pyop2/gpu/snpt.py index f22b1f184..66c72550f 100644 --- a/pyop2/gpu/snpt.py +++ b/pyop2/gpu/snpt.py @@ -3,7 +3,9 @@ def snpt_transform(kernel, block_size): """ - SNPT := Single 'n' Per Thread transformation. + SNPT := Single 'n' Per Thread. + + Implements outer-loop parallelization strategy. PyOP2 uses 'n' as the outer loop iname. In Firedrake 'n' might denote either a cell or a DOF.