diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 00e7f238..23521bd0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -18,12 +18,13 @@ jobs: strategy: fail-fast: false matrix: - version: ['1.10', 'pre'] # 'nightly' + version: ['1.10', 'pre', 'nightly'] os: [ubuntu-latest, macOS-latest, windows-latest] arch: [x64] llvm_args: [''] include: # starting with Julia 1.10, we can enable opaque pointers + # from Juila 1.12 on, this is the default. - version: '1.10' os: 'ubuntu-latest' arch: 'x64' @@ -48,18 +49,6 @@ jobs: os: 'windows-latest' arch: 'x64' llvm_args: '--opaque-pointers' - #- version: 'nightly' - # os: 'ubuntu-latest' - # arch: 'x64' - # llvm_args: '--opaque-pointers' - #- version: 'nightly' - # os: 'macOS-latest' - # arch: 'x64' - # llvm_args: '--opaque-pointers' - #- version: 'nightly' - # os: 'windows-latest' - # arch: 'x64' - # llvm_args: '--opaque-pointers' steps: - uses: actions/checkout@v4 diff --git a/src/interface.jl b/src/interface.jl index be4a65b8..1346c858 100644 --- a/src/interface.jl +++ b/src/interface.jl @@ -252,7 +252,11 @@ method_table(@nospecialize(job::CompilerJob)) = GLOBAL_METHOD_TABLE # the inference parameters to use when constructing the GPUInterpreter function inference_params(@nospecialize(job::CompilerJob)) - return CC.InferenceParams(; unoptimize_throw_blocks=false) + if VERSION >= v"1.12.0-DEV.1017" + CC.InferenceParams() + else + CC.InferenceParams(; unoptimize_throw_blocks=false) + end end # the optimization parameters to use when constructing the GPUInterpreter diff --git a/src/utils.jl b/src/utils.jl index 6ad39bae..6d10927f 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -95,6 +95,12 @@ end # XXX: it's not allowed to switch tasks while under this lock, can we guarantee that? # its probably easier to start using our own LLVM context when that's possible. macro locked(ex) + if VERSION >= v"1.12.0-DEV.769" + # no need to handle locking; it's taken care of by the engine + # as long as we use a correct cache owner token. + return esc(ex) + end + def = splitdef(ex) def[:body] = quote ccall(:jl_typeinf_lock_begin, Cvoid, ()) @@ -109,6 +115,10 @@ end # HACK: temporarily unlock again to perform a task switch macro unlocked(ex) + if VERSION >= v"1.12.0-DEV.769" + return esc(ex) + end + def = splitdef(ex) def[:body] = quote ccall(:jl_typeinf_lock_end, Cvoid, ()) diff --git a/src/validation.jl b/src/validation.jl index e1a355b2..93e18950 100644 --- a/src/validation.jl +++ b/src/validation.jl @@ -218,6 +218,18 @@ function check_ir!(job, errors::Vector{IRError}, inst::LLVM.CallInst) @safe_debug "Decoding arguments to jl_get_binding_or_error failed" inst bb=LLVM.parent(inst) push!(errors, (DELAYED_BINDING, bt, nothing)) end + elseif fn == "jl_reresolve_binding_value_seqcst" || fn == "ijl_reresolve_binding_value_seqcst" + try + # pry the binding from the IR + expr = arguments(inst)[1]::ConstantExpr + expr = first(operands(expr))::ConstantInt # get rid of inttoptr + ptr = Ptr{Any}(convert(Int, expr)) + obj = Base.unsafe_pointer_to_objref(ptr) + push!(errors, (DELAYED_BINDING, bt, obj.globalref)) + catch e + @safe_debug "Decoding arguments to jl_reresolve_binding_value_seqcst failed" inst bb=LLVM.parent(inst) + push!(errors, (DELAYED_BINDING, bt, nothing)) + end elseif fn == "jl_invoke" || fn == "ijl_invoke" try f, args, nargs, meth = arguments(inst) diff --git a/test/gcn_tests.jl b/test/gcn_tests.jl index 1536dbed..0fb64f24 100644 --- a/test/gcn_tests.jl +++ b/test/gcn_tests.jl @@ -74,44 +74,51 @@ end # bug: depending on a child function from multiple parents resulted in # the child only being present once - @noinline child(i) = sink_gcn(i) - function parent1(i) - child(i) - return + mod = @eval module $(gensym()) + export child, parent1, parent2 + + @noinline child(i) = sink_gcn(i) + function parent1(i) + child(i) + return + end + function parent2(i) + child(i+1) + return + end end - asm = sprint(io->GCN.code_native(io, parent1, Tuple{Int}; dump_module=true)) + asm = sprint(io->GCN.code_native(io, mod.parent1, Tuple{Int}; dump_module=true)) @test occursin(r"\.type.*julia_[[:alnum:]_.]*child_\d*,@function", asm) - function parent2(i) - child(i+1) - return - end - - asm = sprint(io->GCN.code_native(io, parent2, Tuple{Int}; dump_module=true)) + asm = sprint(io->GCN.code_native(io, mod.parent2, Tuple{Int}; dump_module=true)) @test occursin(r"\.type.*julia_[[:alnum:]_.]*child_\d*,@function", asm) end @testset "child function reuse bis" begin # bug: similar, but slightly different issue as above # in the case of two child functions - @noinline child1(i) = sink_gcn(i) - @noinline child2(i) = sink_gcn(i+1) - function parent1(i) - child1(i) + child2(i) - return + + mod = @eval module $(gensym()) + export parent1, parent2, child1, child2 + + @noinline child1(i) = sink_gcn(i) + @noinline child2(i) = sink_gcn(i+1) + function parent1(i) + child1(i) + child2(i) + return + end + function parent2(i) + child1(i+1) + child2(i+1) + return + end end - asm = sprint(io->GCN.code_native(io, parent1, Tuple{Int}; dump_module=true)) + asm = sprint(io->GCN.code_native(io, mod.parent1, Tuple{Int}; dump_module=true)) @test occursin(r"\.type.*julia_[[:alnum:]_.]*child1_\d*,@function", asm) @test occursin(r"\.type.*julia_[[:alnum:]_.]*child2_\d*,@function", asm) - function parent2(i) - child1(i+1) + child2(i+1) - return - end - - asm = sprint(io->GCN.code_native(io, parent2, Tuple{Int}; dump_module=true)) + asm = sprint(io->GCN.code_native(io, mod.parent2, Tuple{Int}; dump_module=true)) @test occursin(r"\.type.*julia_[[:alnum:]_.]*child1_\d*,@function", asm) @test occursin(r"\.type.*julia_[[:alnum:]_.]*child2_\d*,@function", asm) end diff --git a/test/native_tests.jl b/test/native_tests.jl index 298c1010..2b4b8b48 100644 --- a/test/native_tests.jl +++ b/test/native_tests.jl @@ -388,46 +388,61 @@ Base.unsafe_trunc(::Type{Int}, x::CleverType) = unsafe_trunc(Int, x.x) end @testset "invalid LLVM IR" begin - foobar(i) = println(i) + mod = @eval module $(gensym()) + export foobar + foobar(i) = println(i) + end @test_throws_message(InvalidIRError, - Native.code_execution(foobar, Tuple{Int})) do msg + Native.code_execution(mod.foobar, Tuple{Int})) do msg occursin("invalid LLVM IR", msg) && (occursin(GPUCompiler.RUNTIME_FUNCTION, msg) || occursin(GPUCompiler.UNKNOWN_FUNCTION, msg) || occursin(GPUCompiler.DYNAMIC_CALL, msg)) && occursin("[1] println", msg) && - occursin(r"\[2\] .*foobar", msg) + occursin("[2] foobar", msg) end end @testset "invalid LLVM IR (ccall)" begin - foobar(p) = (unsafe_store!(p, ccall(:time, Cint, ())); nothing) + mod = @eval module $(gensym()) + export foobar + function foobar(p) + unsafe_store!(p, ccall(:time, Cint, ())) + return + end + end @test_throws_message(InvalidIRError, - Native.code_execution(foobar, Tuple{Ptr{Int}})) do msg + Native.code_execution(mod.foobar, Tuple{Ptr{Int}})) do msg if VERSION >= v"1.11-" occursin("invalid LLVM IR", msg) && occursin(GPUCompiler.LAZY_FUNCTION, msg) && occursin("call to time", msg) && - occursin(r"\[1\] .*foobar", msg) + occursin("[1] foobar", msg) else occursin("invalid LLVM IR", msg) && occursin(GPUCompiler.POINTER_FUNCTION, msg) && - occursin(r"\[1\] .*foobar", msg) + occursin("[1] foobar", msg) end end end @testset "delayed bindings" begin - kernel() = (undefined; return) + mod = @eval module $(gensym()) + export kernel + function kernel() + undefined + return + end + end @test_throws_message(InvalidIRError, - Native.code_execution(kernel, Tuple{})) do msg + Native.code_execution(mod.kernel, Tuple{})) do msg occursin("invalid LLVM IR", msg) && occursin(GPUCompiler.DELAYED_BINDING, msg) && - occursin("use of 'undefined'", msg) && - occursin(r"\[1\] .*kernel", msg) + occursin(r"use of '.*undefined'", msg) && + occursin("[1] kernel", msg) end end @@ -442,15 +457,18 @@ end occursin("invalid LLVM IR", msg) && occursin(GPUCompiler.DYNAMIC_CALL, msg) && occursin("call to nospecialize_child", msg) && - occursin(r"\[1\] kernel", msg) + occursin("[1] kernel", msg) end end @testset "dynamic call (apply)" begin - func() = println(1) + mod = @eval module $(gensym()) + export func + func() = println(1) + end @test_throws_message(InvalidIRError, - Native.code_execution(func, Tuple{})) do msg + Native.code_execution(mod.func, Tuple{})) do msg occursin("invalid LLVM IR", msg) && occursin(GPUCompiler.DYNAMIC_CALL, msg) && occursin("call to println", msg) && diff --git a/test/ptx_tests.jl b/test/ptx_tests.jl index c059ba60..6caa6c71 100644 --- a/test/ptx_tests.jl +++ b/test/ptx_tests.jl @@ -27,11 +27,11 @@ end end ir = sprint(io->PTX.code_llvm(io, mod.kernel, Tuple{mod.Aggregate})) - @test occursin(r"@\w*kernel\w*\(({ i64 }|\[1 x i64\])\* ", ir) || - occursin(r"@\w*kernel\w*\(ptr ", ir) + @test occursin(r"@julia_kernel\w*\(({ i64 }|\[1 x i64\])\* ", ir) || + occursin(r"@julia_kernel\w*\(ptr ", ir) ir = sprint(io->PTX.code_llvm(io, mod.kernel, Tuple{mod.Aggregate}; kernel=true)) - @test occursin(r"@\w*kernel\w*\(.*({ i64 }|\[1 x i64\]) ", ir) + @test occursin(r"@_Z6kernel9Aggregate\(.*({ i64 }|\[1 x i64\]) ", ir) end @testset "property_annotations" begin @@ -83,13 +83,16 @@ end @testset "kernel state" begin # state should be passed by value to kernel functions - kernel() = return + mod = @eval module $(gensym()) + export kernel + kernel() = return + end - ir = sprint(io->PTX.code_llvm(io, kernel, Tuple{})) - @test occursin(r"@\w*kernel\w*\(\)", ir) + ir = sprint(io->PTX.code_llvm(io, mod.kernel, Tuple{})) + @test occursin(r"@julia_kernel\w*\(\)", ir) - ir = sprint(io->PTX.code_llvm(io, kernel, Tuple{}; kernel=true)) - @test occursin(r"@\w*kernel\w*\(\[1 x i64\] %state\)", ir) + ir = sprint(io->PTX.code_llvm(io, mod.kernel, Tuple{}; kernel=true)) + @test occursin("@_Z6kernel([1 x i64] %state)", ir) # state should only passed to device functions that use it @@ -111,13 +114,13 @@ end kernel=true, dump_module=true)) # kernel should take state argument before all else - @test occursin(r"@\w*kernel\w*\(\[1 x i64\] %state", ir) + @test occursin(r"@_Z6kernelP5Int64\(\[1 x i64\] %state", ir) # child1 doesn't use the state - @test occursin(r"@\w*child1\w*\((i64|i8\*|ptr)", ir) + @test occursin(r"@julia_child1\w*\((i64|i8\*|ptr)", ir) # child2 does - @test occursin(r"@\w*child2\w*\(\[1 x i64\] %state", ir) + @test occursin(r"@julia_child2\w*\(\[1 x i64\] %state", ir) # can't have the unlowered intrinsic @test !occursin("julia.gpu.state_getter", ir) @@ -133,46 +136,58 @@ end @testset "child functions" begin # we often test using @noinline child functions, so test whether these survive # (despite not having side-effects) - @noinline child(i) = sink(i) - function parent(i) - child(i) - return + + mod = @eval module $(gensym()) + import ..sink + export child, parent + + @noinline child(i) = sink(i) + function parent(i) + child(i) + return + end end - asm = sprint(io->PTX.code_native(io, parent, Tuple{Int64})) - @test occursin(r"call.uni\s+julia_.*child_"m, asm) + asm = sprint(io->PTX.code_native(io, mod.parent, Tuple{Int64})) + @test occursin(r"call.uni\s+julia_child_"m, asm) end @testset "kernel functions" begin - @noinline nonentry(i) = sink(i) - function entry(i) - nonentry(i) - return + mod = @eval module $(gensym()) + import ..sink + export nonentry, entry + + @noinline nonentry(i) = sink(i) + function entry(i) + nonentry(i) + return + end end - asm = sprint(io->PTX.code_native(io, entry, Tuple{Int64}; kernel=true)) - @test occursin(r"\.visible \.entry \w*entry", asm) - @test !occursin(r"\.visible \.func \w*nonentry", asm) - @test occursin(r"\.func \w*nonentry", asm) + asm = sprint(io->PTX.code_native(io, mod.entry, Tuple{Int64}; + kernel=true, dump_module=true)) + @test occursin(".visible .entry _Z5entry5Int64", asm) + @test !occursin(".visible .func julia_nonentry", asm) + @test occursin(".func julia_nonentry", asm) @testset "property_annotations" begin - asm = sprint(io->PTX.code_native(io, entry, Tuple{Int64}; kernel=true)) + asm = sprint(io->PTX.code_native(io, mod.entry, Tuple{Int64}; kernel=true)) @test !occursin("maxntid", asm) - asm = sprint(io->PTX.code_native(io, entry, Tuple{Int64}; + asm = sprint(io->PTX.code_native(io, mod.entry, Tuple{Int64}; kernel=true, maxthreads=42)) @test occursin(".maxntid 42, 1, 1", asm) - asm = sprint(io->PTX.code_native(io, entry, Tuple{Int64}; + asm = sprint(io->PTX.code_native(io, mod.entry, Tuple{Int64}; kernel=true, minthreads=42)) @test occursin(".reqntid 42, 1, 1", asm) - asm = sprint(io->PTX.code_native(io, entry, Tuple{Int64}; + asm = sprint(io->PTX.code_native(io, mod.entry, Tuple{Int64}; kernel=true, blocks_per_sm=42)) @test occursin(".minnctapersm 42", asm) if LLVM.version() >= v"4.0" - asm = sprint(io->PTX.code_native(io, entry, Tuple{Int64}; + asm = sprint(io->PTX.code_native(io, mod.entry, Tuple{Int64}; kernel=true, maxregs=42)) @test occursin(".maxnreg 42", asm) end @@ -183,44 +198,55 @@ end # bug: depending on a child function from multiple parents resulted in # the child only being present once - @noinline child(i) = sink(i) - function parent1(i) - child(i) - return - end - - asm = sprint(io->PTX.code_native(io, parent1, Tuple{Int})) - @test occursin(r".func \w*child_", asm) + mod = @eval module $(gensym()) + import ..sink + export child, parent1, parent2 - function parent2(i) - child(i+1) - return + @noinline child(i) = sink(i) + function parent1(i) + child(i) + return + end + function parent2(i) + child(i+1) + return + end end - asm = sprint(io->PTX.code_native(io, parent2, Tuple{Int})) - @test occursin(r".func \w*child_", asm) + asm = sprint(io->PTX.code_native(io, mod.parent1, Tuple{Int})) + @test occursin(".func julia_child_", asm) + + asm = sprint(io->PTX.code_native(io, mod.parent2, Tuple{Int})) + @test occursin(".func julia_child_", asm) end @testset "child function reuse bis" begin # bug: similar, but slightly different issue as above # in the case of two child functions - @noinline child1(i) = sink(i) - @noinline child2(i) = sink(i+1) - function parent1(i) - child1(i) + child2(i) - return - end - asm = sprint(io->PTX.code_native(io, parent1, Tuple{Int})) - @test occursin(r".func \w*child1_", asm) - @test occursin(r".func \w*child2_", asm) - function parent2(i) - child1(i+1) + child2(i+1) - return + mod = @eval module $(gensym()) + import ..sink + export parent1, parent2, child1, child2 + + @noinline child1(i) = sink(i) + @noinline child2(i) = sink(i+1) + function parent1(i) + child1(i) + child2(i) + return + end + function parent2(i) + child1(i+1) + child2(i+1) + return + end end - asm = sprint(io->PTX.code_native(io, parent2, Tuple{Int})) - @test occursin(r".func \w*child1_", asm) - @test occursin(r".func \w*child2_", asm) + + asm = sprint(io->PTX.code_native(io, mod.parent1, Tuple{Int})) + @test occursin(".func julia_child1_", asm) + @test occursin(".func julia_child2_", asm) + + asm = sprint(io->PTX.code_native(io, mod.parent2, Tuple{Int})) + @test occursin(".func julia_child1_", asm) + @test occursin(".func julia_child2_", asm) end @testset "indirect sysimg function use" begin @@ -261,6 +287,8 @@ end @testset "GC and TLS lowering" begin mod = @eval module $(gensym()) + import ..sink + mutable struct PleaseAllocate y::Csize_t end diff --git a/test/spirv_tests.jl b/test/spirv_tests.jl index c2903994..1774e0ae 100644 --- a/test/spirv_tests.jl +++ b/test/spirv_tests.jl @@ -19,13 +19,16 @@ using SPIRV_LLVM_Translator_unified_jll, SPIRV_Tools_jll end @testset "byval workaround" begin - kernel(x) = return + mod = @eval module $(gensym()) + export kernel + kernel(x) = return + end - ir = sprint(io->SPIRV.code_llvm(io, kernel, Tuple{Tuple{Int}})) + ir = sprint(io->SPIRV.code_llvm(io, mod.kernel, Tuple{Tuple{Int}})) @test occursin(r"@\w*kernel\w*\(({ i64 }|\[1 x i64\])\*", ir) || occursin(r"@\w*kernel\w*\(ptr", ir) - ir = sprint(io->SPIRV.code_llvm(io, kernel, Tuple{Tuple{Int}}; kernel=true)) + ir = sprint(io->SPIRV.code_llvm(io, mod.kernel, Tuple{Tuple{Int}}; kernel=true)) @test occursin(r"@\w*kernel\w*\(.*{ ({ i64 }|\[1 x i64\]) }\*.+byval", ir) || occursin(r"@\w*kernel\w*\(ptr byval", ir) end @@ -39,34 +42,37 @@ end end @testset "unsupported type detection" begin - function kernel(ptr, val) - unsafe_store!(ptr, val) - return + mod = @eval module $(gensym()) + export kernel + function kernel(ptr, val) + unsafe_store!(ptr, val) + return + end end - ir = sprint(io->SPIRV.code_llvm(io, kernel, Tuple{Ptr{Float16}, Float16}; validate=true)) + ir = sprint(io->SPIRV.code_llvm(io, mod.kernel, Tuple{Ptr{Float16}, Float16}; validate=true)) @test occursin("store half", ir) - ir = sprint(io->SPIRV.code_llvm(io, kernel, Tuple{Ptr{Float32}, Float32}; validate=true)) + ir = sprint(io->SPIRV.code_llvm(io, mod.kernel, Tuple{Ptr{Float32}, Float32}; validate=true)) @test occursin("store float", ir) - ir = sprint(io->SPIRV.code_llvm(io, kernel, Tuple{Ptr{Float64}, Float64}; validate=true)) + ir = sprint(io->SPIRV.code_llvm(io, mod.kernel, Tuple{Ptr{Float64}, Float64}; validate=true)) @test occursin("store double", ir) @test_throws_message(InvalidIRError, - SPIRV.code_llvm(devnull, kernel, Tuple{Ptr{Float16}, Float16}; + SPIRV.code_llvm(devnull, mod.kernel, Tuple{Ptr{Float16}, Float16}; supports_fp16=false, validate=true)) do msg occursin("unsupported use of half value", msg) && occursin("[1] unsafe_store!", msg) && - occursin(r"\[2\] .*kernel", msg) + occursin("[2] kernel", msg) end @test_throws_message(InvalidIRError, - SPIRV.code_llvm(devnull, kernel, Tuple{Ptr{Float64}, Float64}; + SPIRV.code_llvm(devnull, mod.kernel, Tuple{Ptr{Float64}, Float64}; supports_fp64=false, validate=true)) do msg occursin("unsupported use of double value", msg) && occursin("[1] unsafe_store!", msg) && - occursin(r"\[2\] .*kernel", msg) + occursin("[2] kernel", msg) end end