-
Notifications
You must be signed in to change notification settings - Fork 41
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #233 from JuliaGPU/tb/kernelabstractions
Initial KernelAbstractions.jl integration.
- Loading branch information
Showing
7 changed files
with
224 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,179 @@ | ||
module OpenCLKernels | ||
|
||
using ..OpenCL | ||
using ..OpenCL: @device_override, SPIRVIntrinsics | ||
|
||
import KernelAbstractions as KA | ||
|
||
import StaticArrays | ||
|
||
import Adapt | ||
|
||
|
||
## Back-end Definition | ||
|
||
export OpenCLBackend | ||
|
||
struct OpenCLBackend <: KA.GPU | ||
end | ||
|
||
KA.allocate(::OpenCLBackend, ::Type{T}, dims::Tuple) where T = CLArray{T}(undef, dims) | ||
KA.zeros(::OpenCLBackend, ::Type{T}, dims::Tuple) where T = OpenCL.zeros(T, dims) | ||
KA.ones(::OpenCLBackend, ::Type{T}, dims::Tuple) where T = OpenCL.ones(T, dims) | ||
|
||
KA.get_backend(::CLArray) = OpenCLBackend() | ||
KA.synchronize(::OpenCLBackend) = cl.device_synchronize() | ||
KA.supports_float64(::OpenCLBackend) = false # XXX: this is platform/device dependent | ||
|
||
Adapt.adapt_storage(::OpenCLBackend, a::Array) = Adapt.adapt(CLArray, a) | ||
Adapt.adapt_storage(::OpenCLBackend, a::CLArray) = a | ||
Adapt.adapt_storage(::KA.CPU, a::CLArray) = convert(Array, a) | ||
|
||
|
||
## Memory Operations | ||
|
||
function KA.copyto!(::OpenCLBackend, A, B) | ||
copyto!(A, B) | ||
# TODO: Address device to host copies in jl being synchronizing | ||
end | ||
|
||
|
||
## Kernel Launch | ||
|
||
function KA.mkcontext(kernel::KA.Kernel{OpenCLBackend}, _ndrange, iterspace) | ||
KA.CompilerMetadata{KA.ndrange(kernel), KA.DynamicCheck}(_ndrange, iterspace) | ||
end | ||
function KA.mkcontext(kernel::KA.Kernel{OpenCLBackend}, I, _ndrange, iterspace, | ||
::Dynamic) where Dynamic | ||
KA.CompilerMetadata{KA.ndrange(kernel), Dynamic}(I, _ndrange, iterspace) | ||
end | ||
|
||
function KA.launch_config(kernel::KA.Kernel{OpenCLBackend}, ndrange, workgroupsize) | ||
if ndrange isa Integer | ||
ndrange = (ndrange,) | ||
end | ||
if workgroupsize isa Integer | ||
workgroupsize = (workgroupsize, ) | ||
end | ||
|
||
# partition checked that the ndrange's agreed | ||
if KA.ndrange(kernel) <: KA.StaticSize | ||
ndrange = nothing | ||
end | ||
|
||
iterspace, dynamic = if KA.workgroupsize(kernel) <: KA.DynamicSize && | ||
workgroupsize === nothing | ||
# use ndrange as preliminary workgroupsize for autotuning | ||
KA.partition(kernel, ndrange, ndrange) | ||
else | ||
KA.partition(kernel, ndrange, workgroupsize) | ||
end | ||
|
||
return ndrange, workgroupsize, iterspace, dynamic | ||
end | ||
|
||
function threads_to_workgroupsize(threads, ndrange) | ||
total = 1 | ||
return map(ndrange) do n | ||
x = min(div(threads, total), n) | ||
total *= x | ||
return x | ||
end | ||
end | ||
|
||
function (obj::KA.Kernel{OpenCLBackend})(args...; ndrange=nothing, workgroupsize=nothing) | ||
ndrange, workgroupsize, iterspace, dynamic = | ||
KA.launch_config(obj, ndrange, workgroupsize) | ||
|
||
# this might not be the final context, since we may tune the workgroupsize | ||
ctx = KA.mkcontext(obj, ndrange, iterspace) | ||
kernel = @opencl launch=false obj.f(ctx, args...) | ||
|
||
# figure out the optimal workgroupsize automatically | ||
if KA.workgroupsize(obj) <: KA.DynamicSize && workgroupsize === nothing | ||
wg_info = cl.work_group_info(kernel.fun, cl.device()) | ||
wg_size_nd = threads_to_workgroupsize(wg_info.size, ndrange) | ||
iterspace, dynamic = KA.partition(obj, ndrange, wg_size_nd) | ||
ctx = KA.mkcontext(obj, ndrange, iterspace) | ||
end | ||
|
||
groups = length(KA.blocks(iterspace)) | ||
items = length(KA.workitems(iterspace)) | ||
|
||
if groups == 0 | ||
return nothing | ||
end | ||
|
||
# Launch kernel | ||
global_size = groups * items | ||
local_size = items | ||
kernel(ctx, args...; global_size, local_size) | ||
|
||
return nothing | ||
end | ||
|
||
|
||
## Indexing Functions | ||
|
||
@device_override @inline function KA.__index_Local_Linear(ctx) | ||
return get_local_id(1) | ||
end | ||
|
||
@device_override @inline function KA.__index_Group_Linear(ctx) | ||
return get_group_id(1) | ||
end | ||
|
||
@device_override @inline function KA.__index_Global_Linear(ctx) | ||
return get_global_id(1) | ||
end | ||
|
||
@device_override @inline function KA.__index_Local_Cartesian(ctx) | ||
@inbounds KA.workitems(KA.__iterspace(ctx))[get_local_id(1)] | ||
end | ||
|
||
@device_override @inline function KA.__index_Group_Cartesian(ctx) | ||
@inbounds KA.blocks(KA.__iterspace(ctx))[get_group_id(1)] | ||
end | ||
|
||
@device_override @inline function KA.__index_Global_Cartesian(ctx) | ||
return @inbounds KA.expand(KA.__iterspace(ctx), get_group_id(1), get_local_id(1)) | ||
end | ||
|
||
@device_override @inline function KA.__validindex(ctx) | ||
if KA.__dynamic_checkbounds(ctx) | ||
I = @inbounds KA.expand(KA.__iterspace(ctx), get_group_id(1), get_local_id(1)) | ||
return I in KA.__ndrange(ctx) | ||
else | ||
return true | ||
end | ||
end | ||
|
||
|
||
## Shared and Scratch Memory | ||
|
||
@device_override @inline function KA.SharedMemory(::Type{T}, ::Val{Dims}, ::Val{Id}) where {T, Dims, Id} | ||
ptr = SPIRVIntrinsics.emit_localmemory(T, Val(prod(Dims))) | ||
CLDeviceArray(Dims, ptr) | ||
end | ||
|
||
@device_override @inline function KA.Scratchpad(ctx, ::Type{T}, ::Val{Dims}) where {T, Dims} | ||
StaticArrays.MArray{KA.__size(Dims), T}(undef) | ||
end | ||
|
||
|
||
## Synchronization and Printing | ||
|
||
@device_override @inline function KA.__synchronize() | ||
barrier() | ||
end | ||
|
||
@device_override @inline function KA.__print(args...) | ||
SPIRVIntrinsics._print(args...) | ||
end | ||
|
||
|
||
## Other | ||
|
||
KA.argconvert(::KA.Kernel{OpenCLBackend}, arg) = clconvert(arg) | ||
|
||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,12 @@ | ||
[deps] | ||
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" | ||
IOCapture = "b5f81e59-6552-4d32-b1f0-c071b021bf89" | ||
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" | ||
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" | ||
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" | ||
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" | ||
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" | ||
SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b" | ||
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" | ||
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" | ||
pocl_jll = "627d6b7a-bbe6-5189-83e7-98cc0a5aeadd" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
if !in("cl_khr_il_program", cl.device().extensions) | ||
@warn "Skipping KernelAbstractions.jl tests on $(cl.platform().name)" | ||
else | ||
|
||
import KernelAbstractions | ||
include(joinpath(dirname(pathof(KernelAbstractions)), "..", "test", "testsuite.jl")) | ||
|
||
skip_tests=Set([ | ||
"sparse", | ||
"Convert", # Need to opt out of i128 | ||
]) | ||
Testsuite.testsuite(OpenCLBackend, "OpenCL", OpenCL, CLArray, CLDeviceArray; skip_tests) | ||
|
||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters