Skip to content

Commit

Permalink
Add native vadd example.
Browse files Browse the repository at this point in the history
  • Loading branch information
maleadt committed Sep 12, 2024
1 parent b541f6c commit 86870ac
Show file tree
Hide file tree
Showing 9 changed files with 71 additions and 54 deletions.
3 changes: 2 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@ LLVM = "929cbde3-209d-540e-8aea-75f648917ca0"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
OpenCL_jll = "6cb37087-e8b6-5417-8430-1f242f1e46e4"
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
SPIRVIntrinsics = "71d1d633-e7e8-4a92-83a1-de8814b09ba8"
SPIRV_LLVM_Translator_unified_jll = "85f0d8ed-5b39-5caa-b1ae-7472de402361"

[compat]
LLVM = "9.1"
OpenCL_jll = "2024.5.8"
julia = "1.10"
LLVM = "9.1"
36 changes: 0 additions & 36 deletions examples/demo.jl

This file was deleted.

27 changes: 27 additions & 0 deletions examples/vadd.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
using OpenCL, pocl_jll, Test

const source = """
__kernel void vadd(__global const float *a,
__global const float *b,
__global float *c) {
int i = get_global_id(0);
c[i] = a[i] + b[i];
}"""

dims = (2,)
a = round.(rand(Float32, dims) * 100)
b = round.(rand(Float32, dims) * 100)
c = similar(a)

d_a = CLArray(a)
d_b = CLArray(b)
d_c = CLArray(c)

prog = cl.Program(; source) |> cl.build!
kern = cl.Kernel(prog, "vadd")

len = prod(dims)
clcall(kern, Tuple{Ptr{Float32}, Ptr{Float32}, Ptr{Float32}},
d_a, d_b, d_c; global_size=(len,))
c = Array(d_c)
@test a+b c
21 changes: 21 additions & 0 deletions examples/vadd_native.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
using OpenCL, pocl_jll, Test

function vadd(a, b, c)
i = get_global_id()
@inbounds c[i] = a[i] + b[i]
return
end

dims = (2,)
a = round.(rand(Float32, dims) * 100)
b = round.(rand(Float32, dims) * 100)
c = similar(a)

d_a = CLArray(a)
d_b = CLArray(b)
d_c = CLArray(c)

len = prod(dims)
@opencl global_size=len vadd(d_a, d_b, d_c)
c = Array(d_c)
@test a+b c
9 changes: 6 additions & 3 deletions lib/cl/kernel.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
# OpenCL.Kernel

export clcall

mutable struct Kernel <: CLObject
id::cl_kernel

Expand Down Expand Up @@ -69,7 +72,7 @@ function set_arg!(k::Kernel, idx::Integer, arg::SVMBuffer)
clSetKernelArgSVMPointer(k, cl_uint(idx-1), arg.ptr)
return k
end
## when passing with `cl.clcall`, which has pre-converted the buffer
## when passing with `clcall`, which has pre-converted the buffer
function set_arg!(k::Kernel, idx::Integer, arg::Ptr)
if arg != C_NULL
clSetKernelArgSVMPointer(k, cl_uint(idx-1), arg)
Expand Down Expand Up @@ -213,8 +216,8 @@ clcall(f::F, types::Tuple, args::Vararg{Any,N}; kwargs...) where {N,F} =
clcall(f, _to_tuple_type(types), args...; kwargs...)

function clcall(k::Kernel, types::Type{T}, args::Vararg{Any,N}; kwargs...) where {T,N}
call_closure = function (pointers::Vararg{Any,N})
call(k, pointers...; kwargs...)
call_closure = function (converted_args::Vararg{Any,N})
call(k, converted_args...; kwargs...)
end
convert_arguments(call_closure, types, args...)
end
Expand Down
3 changes: 2 additions & 1 deletion src/OpenCL.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,13 @@ using GPUCompiler
using LLVM, LLVM.Interop
using SPIRV_LLVM_Translator_unified_jll
using Adapt
using Reexport

using Core: LLVMPtr

# library wrappers
include("../lib/cl/CL.jl")
using .cl
@reexport using .cl
export cl

# device functionality
Expand Down
2 changes: 1 addition & 1 deletion src/compiler/execution.jl
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ abstract type AbstractKernel{F,TT} end

quote
svm_pointers = Ptr{Cvoid}[]
cl.clcall(kernel.fun, $call_tt, $(call_args...); svm_pointers, call_kwargs...)
clcall(kernel.fun, $call_tt, $(call_args...); svm_pointers, call_kwargs...)
end
end

Expand Down
8 changes: 4 additions & 4 deletions test/behaviour.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
prg = cl.Program(source=hello_world_kernel) |> cl.build!
kern = cl.Kernel(prg, "hello")

cl.clcall(kern, Tuple{Ptr{Cchar}}, out_arr; global_size=str_len)
clcall(kern, Tuple{Ptr{Cchar}}, out_arr; global_size=str_len)
h = Array(out_arr)

@test hello_world_str == GC.@preserve h unsafe_string(pointer(h))
Expand Down Expand Up @@ -212,8 +212,8 @@ end
R_arr = CLArray{Float32}(undef, 10; access=:w)

global_size = size(X)
cl.clcall(part3, Tuple{Ptr{Float32}, Ptr{Float32}, Ptr{Float32}, Ptr{Params}},
X_arr, Y_arr, R_arr, P_arr; global_size)
clcall(part3, Tuple{Ptr{Float32}, Ptr{Float32}, Ptr{Float32}, Ptr{Params}},
X_arr, Y_arr, R_arr, P_arr; global_size)

r = Array(R_arr)
@test all(x -> x == 13.5, r)
Expand Down Expand Up @@ -251,7 +251,7 @@ end

P = MutableParams(0.5, 10.0)
P_arr = CLArray{Float32}(undef, 2)
cl.clcall(part3, Tuple{Ptr{Float32}, MutableParams}, P_arr, P)
clcall(part3, Tuple{Ptr{Float32}, MutableParams}, P_arr, P)

r = Array(P_arr)

Expand Down
16 changes: 8 additions & 8 deletions test/kernel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -97,17 +97,17 @@
k = cl.Kernel(p, "test")

# dimensions must be the same size
@test_throws ArgumentError cl.clcall(k, Tuple{Ptr{Float32}}, d_arr;
global_size=(1,), local_size=(1,1))
@test_throws ArgumentError cl.clcall(k, Tuple{Ptr{Float32}}, d_arr;
global_size=(1,1), local_size=(1,))
@test_throws ArgumentError clcall(k, Tuple{Ptr{Float32}}, d_arr;
global_size=(1,), local_size=(1,1))
@test_throws ArgumentError clcall(k, Tuple{Ptr{Float32}}, d_arr;
global_size=(1,1), local_size=(1,))

# dimensions are bounded
max_work_dim = cl.device().max_work_item_dims
bad = tuple([1 for _ in 1:(max_work_dim + 1)])

# calls are asynchronous, but cl.read blocks
cl.clcall(k, Tuple{Ptr{Float32}}, d_arr)
clcall(k, Tuple{Ptr{Float32}}, d_arr)
@test Array(d_arr) == [2f0]

# enqueue task is an alias for calling
Expand All @@ -132,7 +132,7 @@
structkernel = cl.Kernel(prg, "structest")
out = CLArray{Float32}(undef, 2)
bstruct = (1, Int32(4))
cl.clcall(structkernel, Tuple{Ptr{Float32}, Tuple{Clong, Cint}}, out, bstruct)
clcall(structkernel, Tuple{Ptr{Float32}, Tuple{Clong, Cint}}, out, bstruct)
@test Array(out) == [1f0, 4f0]
end

Expand All @@ -155,8 +155,8 @@
# (only on some platforms)
vec3_a = (1f0, 2f0, 3f0, 0f0)
vec3_b = (4f0, 5f0, 6f0, 0f0)
cl.clcall(vec3kernel, Tuple{Ptr{Float32}, NTuple{4,Float32}, NTuple{4,Float32}},
out, vec3_a, vec3_b)
clcall(vec3kernel, Tuple{Ptr{Float32}, NTuple{4,Float32}, NTuple{4,Float32}},
out, vec3_a, vec3_b)
@test Array(out) == [1f0, 2f0, 3f0, 4f0, 5f0, 6f0]
end
end

0 comments on commit 86870ac

Please sign in to comment.