From 2c6b1112e52e4263a36ac397313097caf3be7d0f Mon Sep 17 00:00:00 2001 From: simsurace Date: Mon, 14 Feb 2022 14:08:27 +0100 Subject: [PATCH 01/13] Move algorithm dispatch to CPU --- src/rand_binomial.jl | 47 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 5 deletions(-) diff --git a/src/rand_binomial.jl b/src/rand_binomial.jl index 509129c..49ecbce 100644 --- a/src/rand_binomial.jl +++ b/src/rand_binomial.jl @@ -65,20 +65,57 @@ end ## constant (scalar) parameters function rand_binom!(rng, A::BinomialArray, count::Integer, prob::AbstractFloat) - kernel = @cuda launch=false kernel_BTRS_scalar!( - A, count, Float32(prob), rng.seed, rng.counter - ) + n = count + + # edge cases + if prob <= 0 || n <= 0 + A .= 0 + return A + elseif prob >= 1 + A .= n + return A + end + + invert = prob > 0.5f0 + @show invert + if invert + p = 1 - prob + else + p = prob + end + + # Use naive algorithm for n <= 17 + if n <= 17 + kernel = @cuda launch=false kernel_naive_scalar!( + A, n, Float32(p), rng.seed, rng.counter + ) + # Use inversion algorithm for n*p < 10 + elseif n * p < 10f0 + kernel = @cuda launch=false kernel_inversion_scalar!( + A, n, Float32(p), rng.seed, rng.counter + ) + # BTRS algorithm + else + kernel = @cuda launch=false kernel_BTRS_scalar!( + A, n, Float32(p), rng.seed, rng.counter + ) + end + config = launch_configuration(kernel.fun) threads = max(32, min(config.threads, length(A))) blocks = min(config.blocks, cld(length(A), threads)) - kernel(A, count, Float32(prob), rng.seed, rng.counter; threads=threads, blocks=blocks) + kernel(A, n, Float32(p), rng.seed, rng.counter; threads=threads, blocks=blocks) new_counter = Int64(rng.counter) + length(A) overflow, remainder = fldmod(new_counter, typemax(UInt32)) rng.seed += overflow # XXX: is this OK? rng.counter = remainder - return A + if invert + return n .- A + else + return A + end end ## arrays of parameters From 34b3e99a30fcc51e09a76a3bba50ca50b846104f Mon Sep 17 00:00:00 2001 From: simsurace Date: Mon, 14 Feb 2022 14:08:40 +0100 Subject: [PATCH 02/13] Implement separate kernels --- src/kernels.jl | 63 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 62 insertions(+), 1 deletion(-) diff --git a/src/kernels.jl b/src/kernels.jl index cb770a9..ac2a5f2 100644 --- a/src/kernels.jl +++ b/src/kernels.jl @@ -33,7 +33,68 @@ end # BTRS algorithm, adapted from the tensorflow library (https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/random_binomial_op.cc) -## Kernel for scalar parameters +## Kernels for scalar parameters +function kernel_naive_scalar!(A, n, p, seed::UInt32, counter::UInt32) + device_rng = Random.default_rng() + + # initialize the state + @inbounds Random.seed!(device_rng, seed, counter) + + # grid-stride loop + tid = threadIdx().x + window = (blockDim().x - 1i32) * gridDim().x + offset = (blockIdx().x - 1i32) * blockDim().x + + k = 0 + while offset < length(A) + i = tid + offset + + k = 0 + ctr = 1 + while ctr <= n + rand(Float32) < p && (k += 1) + ctr += 1 + end + + if i <= length(A) + @inbounds A[i] = k + end + offset += window + end + return nothing +end +function kernel_inversion_scalar!(A, n, p, seed::UInt32, counter::UInt32) + device_rng = Random.default_rng() + + # initialize the state + @inbounds Random.seed!(device_rng, seed, counter) + + # grid-stride loop + tid = threadIdx().x + window = (blockDim().x - 1i32) * gridDim().x + offset = (blockIdx().x - 1i32) * blockDim().x + + k = 0 + while offset < length(A) + i = tid + offset + + logp = CUDA.log(1f0-p) + geom_sum = 0f0 + k = 0 + while true + geom = ceil(CUDA.log(rand(Float32)) / logp) + geom_sum += geom + geom_sum > n && break + k += 1 + end + + if i <= length(A) + @inbounds A[i] = k + end + offset += window + end + return nothing +end function kernel_BTRS_scalar!(A, n, p, seed::UInt32, counter::UInt32) device_rng = Random.default_rng() From fbf085cc25094b1d7c026c96c33516cb1ccb8b32 Mon Sep 17 00:00:00 2001 From: simsurace Date: Mon, 14 Feb 2022 14:53:14 +0100 Subject: [PATCH 03/13] Remove @show statement --- src/rand_binomial.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/rand_binomial.jl b/src/rand_binomial.jl index 49ecbce..09e1641 100644 --- a/src/rand_binomial.jl +++ b/src/rand_binomial.jl @@ -77,7 +77,6 @@ function rand_binom!(rng, A::BinomialArray, count::Integer, prob::AbstractFloat) end invert = prob > 0.5f0 - @show invert if invert p = 1 - prob else From 8cdb41a79854a02fdad2022255246d550c8cc2b5 Mon Sep 17 00:00:00 2001 From: simsurace Date: Mon, 14 Feb 2022 18:20:50 +0100 Subject: [PATCH 04/13] Clean up kernels --- src/kernels.jl | 103 ++++++++++++++++--------------------------------- 1 file changed, 34 insertions(+), 69 deletions(-) diff --git a/src/kernels.jl b/src/kernels.jl index ac2a5f2..15995dc 100644 --- a/src/kernels.jl +++ b/src/kernels.jl @@ -45,7 +45,6 @@ function kernel_naive_scalar!(A, n, p, seed::UInt32, counter::UInt32) window = (blockDim().x - 1i32) * gridDim().x offset = (blockIdx().x - 1i32) * blockDim().x - k = 0 while offset < length(A) i = tid + offset @@ -74,7 +73,6 @@ function kernel_inversion_scalar!(A, n, p, seed::UInt32, counter::UInt32) window = (blockDim().x - 1i32) * gridDim().x offset = (blockIdx().x - 1i32) * blockDim().x - k = 0 while offset < length(A) i = tid + offset @@ -110,80 +108,47 @@ function kernel_BTRS_scalar!(A, n, p, seed::UInt32, counter::UInt32) while offset < length(A) i = tid + offset - # edge cases - if p <= 0 || n <= 0 - k = 0 - elseif p >= 1 - k = n - # Use naive algorithm for n <= 17 - elseif n <= 17 - k = 0 - ctr = 1 - while ctr <= n - rand(Float32) < p && (k += 1) - ctr += 1 - end - # Use inversion algorithm for n*p < 10 - elseif n * p < 10f0 - logp = CUDA.log(1f0-p) - geom_sum = 0f0 - k = 0 - while true - geom = ceil(CUDA.log(rand(Float32)) / logp) - geom_sum += geom - geom_sum > n && break - k += 1 + r = p/(1f0-p) + s = p*(1f0-p) + + stddev = sqrt(n * s) + b = 1.15f0 + 2.53f0 * stddev + a = -0.0873f0 + 0.0248f0 * b + 0.01f0 * p + c = n * p + 0.5f0 + v_r = 0.92f0 - 4.2f0 / b + + alpha = (2.83f0 + 5.1f0 / b) * stddev; + m = floor((n + 1) * p) + + ks = 0f0 + + while true + usample = rand(Float32) - 0.5f0 + vsample = rand(Float32) + + us = 0.5f0 - abs(usample) + ks = floor((2 * a / us + b) * usample + c) + + if us >= 0.07f0 && vsample <= v_r + break end - # BTRS algorithm - else - # BTRS approximations work well for p <= 0.5 - # invert p and set `invert` flag - (invert = p > 0.5f0) && (p = 1f0 - p) - r = p/(1f0-p) - s = p*(1f0-p) - - stddev = sqrt(n * s) - b = 1.15f0 + 2.53f0 * stddev - a = -0.0873f0 + 0.0248f0 * b + 0.01f0 * p - c = n * p + 0.5f0 - v_r = 0.92f0 - 4.2f0 / b - - alpha = (2.83f0 + 5.1f0 / b) * stddev; - m = floor((n + 1) * p) + if ks < 0 || ks > n + continue + end - ks = 0f0 - - while true - usample = rand(Float32) - 0.5f0 - vsample = rand(Float32) - - us = 0.5f0 - abs(usample) - ks = floor((2 * a / us + b) * usample + c) - - if us >= 0.07f0 && vsample <= v_r - break - end - - if ks < 0 || ks > n - continue - end - - v2 = CUDA.log(vsample * alpha / (a / (us * us) + b)) - ub = (m + 0.5f0) * CUDA.log((m + 1) / (r * (n - m + 1))) + - (n + 1) * CUDA.log((n - m + 1) / (n - ks + 1)) + - (ks + 0.5f0) * CUDA.log(r * (n - ks + 1) / (ks + 1)) + - stirling_approx_tail(m) + stirling_approx_tail(n - m) - stirling_approx_tail(ks) - stirling_approx_tail(n - ks) - if v2 <= ub - break - end + v2 = CUDA.log(vsample * alpha / (a / (us * us) + b)) + ub = (m + 0.5f0) * CUDA.log((m + 1) / (r * (n - m + 1))) + + (n + 1) * CUDA.log((n - m + 1) / (n - ks + 1)) + + (ks + 0.5f0) * CUDA.log(r * (n - ks + 1) / (ks + 1)) + + stirling_approx_tail(m) + stirling_approx_tail(n - m) - stirling_approx_tail(ks) - stirling_approx_tail(n - ks) + if v2 <= ub + break end - invert && (ks = n - ks) - k = Int(ks) end if i <= length(A) - @inbounds A[i] = k + @inbounds A[i] = ks end offset += window end From b41291bb847bdf6150d116caf249b20b755f9816 Mon Sep 17 00:00:00 2001 From: simsurace Date: Mon, 14 Feb 2022 18:28:18 +0100 Subject: [PATCH 05/13] Add distributional tests --- test/Project.toml | 2 ++ test/runtests.jl | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/test/Project.toml b/test/Project.toml index faeac78..ae5622a 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,4 +1,6 @@ [deps] BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" +Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" +Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" diff --git a/test/runtests.jl b/test/runtests.jl index 1af5c53..605fb48 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,5 +1,7 @@ using BinomialGPU using CUDA +using Distributions +using Statistics using BenchmarkTools using Test @@ -179,4 +181,39 @@ using Test end end end # out-of-place + + @testset "Distributional tests" begin + function mean_var_CI(m, S2, n, p, N, α) + truemean = n*p + truevar = n*p*(1-p) + a = quantile(Normal(), α/2) + b = quantile(Normal(), 1-α/2) + c = quantile(Chisq(N-1), α/2) + d = quantile(Chisq(N-1), 1-α/2) + @test a <= (m - truemean)/sqrt(N*truevar) <= b + @test c <= (N-1)*S2/truevar <= d + end + @testset "Scalar parameters" begin + function test_mean_variance(N, n, p) + CUDA.@sync A = rand_binomial(N, count = n, prob = p) + mean_var_CI(mean(A), var(A), n, p, N, 1e-3) + end + N = 2^20 + @testset "n = $n, p = $p" for n in [1, 10, 20, 50, 100, 200, 500, 1000], + p in 0.1:0.1:0.9 + test_mean_variance(N, n, p) + end + end + @testset "Arrays of parameters" begin + function test_mean_variance(N, n, p) + CUDA.@sync A = rand_binomial(N, count = fill(n, N), prob = fill(p, N)) + mean_var_CI(mean(A), var(A), n, p, N, 1e-3) + end + N = 2^20 + @testset "n = $n, p = $p" for n in [1, 10, 20, 50, 100, 200, 500, 1000], + p in 0.1:0.1:0.9 + test_mean_variance(N, n, p) + end + end + end # Distributional tests end From bc7ca1ac72df004acef2ac787df7ebba2e536035 Mon Sep 17 00:00:00 2001 From: simsurace Date: Mon, 14 Feb 2022 18:41:44 +0100 Subject: [PATCH 06/13] Fix full kernel, tighten distributional tests --- src/kernels.jl | 14 ++++++++------ test/runtests.jl | 4 ++-- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/kernels.jl b/src/kernels.jl index 15995dc..b09510e 100644 --- a/src/kernels.jl +++ b/src/kernels.jl @@ -191,6 +191,9 @@ function kernel_BTRS!( @inbounds n = count[I1] @inbounds p = prob[CartesianIndex(I1, I2)] end + # BTRS approximations work well for p <= 0.5 + # invert p and set `invert` flag + (invert = p > 0.5f0) && (p = 1-p) else n = 0 p = 0f0 @@ -223,10 +226,6 @@ function kernel_BTRS!( end # BTRS algorithm else - # BTRS approximations work well for p <= 0.5 - # invert p and set `invert` flag - (invert = p > 0.5f0) && (p = 1f0 - p) - r = p/(1f0-p) s = p*(1f0-p) @@ -265,12 +264,15 @@ function kernel_BTRS!( break end end - invert && (ks = n - ks) k = Int(ks) end if i <= length(A) - @inbounds A[i] = k + if invert + @inbounds A[i] = k + else + @inbounds A[i] = n - k + end end offset += window end diff --git a/test/runtests.jl b/test/runtests.jl index 605fb48..65ba90e 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -196,7 +196,7 @@ using Test @testset "Scalar parameters" begin function test_mean_variance(N, n, p) CUDA.@sync A = rand_binomial(N, count = n, prob = p) - mean_var_CI(mean(A), var(A), n, p, N, 1e-3) + mean_var_CI(mean(A), var(A), n, p, N, 1e-5) end N = 2^20 @testset "n = $n, p = $p" for n in [1, 10, 20, 50, 100, 200, 500, 1000], @@ -207,7 +207,7 @@ using Test @testset "Arrays of parameters" begin function test_mean_variance(N, n, p) CUDA.@sync A = rand_binomial(N, count = fill(n, N), prob = fill(p, N)) - mean_var_CI(mean(A), var(A), n, p, N, 1e-3) + mean_var_CI(mean(A), var(A), n, p, N, 1e-5) end N = 2^20 @testset "n = $n, p = $p" for n in [1, 10, 20, 50, 100, 200, 500, 1000], From c7a1dd28b58911e3e9d40d3ef09c43b05335d750 Mon Sep 17 00:00:00 2001 From: simsurace Date: Mon, 14 Feb 2022 19:42:27 +0100 Subject: [PATCH 07/13] Correct mistake in test --- test/runtests.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/runtests.jl b/test/runtests.jl index 65ba90e..912c53d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -190,7 +190,7 @@ using Test b = quantile(Normal(), 1-α/2) c = quantile(Chisq(N-1), α/2) d = quantile(Chisq(N-1), 1-α/2) - @test a <= (m - truemean)/sqrt(N*truevar) <= b + @test a <= sqrt(N)*(m - truemean)/sqrt(truevar) <= b @test c <= (N-1)*S2/truevar <= d end @testset "Scalar parameters" begin From 0c93ddf006823e85933672116bf3b08551d9650b Mon Sep 17 00:00:00 2001 From: simsurace Date: Mon, 14 Feb 2022 19:44:52 +0100 Subject: [PATCH 08/13] Hard to get it right --- test/runtests.jl | 350 +++++++++++++++++++++++------------------------ 1 file changed, 175 insertions(+), 175 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index 912c53d..411e80c 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -7,180 +7,180 @@ using BenchmarkTools using Test @testset "BinomialGPU.jl" begin - @testset "in-place" begin - @testset "scalar parameters" begin - n = 128 - p = 0.5 - - @testset "A of dim $(length(Adims))" for Adims in [[2,], [2, 4], [2, 4, 8], [2, 4, 8, 16]] - A = CUDA.zeros(Int, Tuple(Adims)) - @test rand_binomial!(A, count = n, prob = p) isa CuArray{Int} - @test minimum(rand_binomial!(A, count = n, prob = p)) >= 0 - @test maximum(rand_binomial!(A, count = n, prob = p)) <= n - end - end - - @testset "parameter arrays" begin - @testset "A of dim $(length(Adims))" for Adims in [[2,], [2, 4], [2, 4, 8], [2, 4, 8, 16]] - A = CUDA.zeros(Int, Tuple(Adims)) - @testset "count of dim 0, prob of dim $j" for j in eachindex(Adims) - pdim = Adims[1:j] - n = 128 - ps = CUDA.rand(pdim...) - @test rand_binomial!(A, count = n, prob = ps) isa CuArray{Int} - @test minimum(rand_binomial!(A, count = n, prob = ps)) >= 0 - @test minimum(n .- rand_binomial!(A, count = n, prob = ps)) >= 0 - end - - @testset "count of dim $i, prob of dim 0" for i in eachindex(Adims) - ndim = Adims[1:i] - ns = CUDA.fill(128, Tuple(ndim)) - p = 0.5 - @test rand_binomial!(A, count = ns, prob = p) isa CuArray{Int} - @test minimum(rand_binomial!(A, count = ns, prob = p)) >= 0 - @test minimum(ns .- rand_binomial!(A, count = ns, prob = p)) >= 0 - end - - @testset "count of dim $i, prob of dim $j" for i in eachindex(Adims), j in eachindex(Adims) - ndim = Adims[1:i] - pdim = Adims[1:j] - ns = CUDA.fill(128, Tuple(ndim)) - ps = CUDA.rand(pdim...) - n = 128 - p = 0.5 - @test rand_binomial!(A, count = ns, prob = ps) isa CuArray{Int} - @test minimum(rand_binomial!(A, count = ns, prob = ps)) >= 0 - @test minimum(ns .- rand_binomial!(A, count = ns, prob = ps)) >= 0 - - - # wrong size in the last dimension - for k in 1:i, l in 1:j - ndim[k] += 1 - ns = CUDA.fill(128, Tuple(ndim)) - ps = CUDA.rand(pdim...) - @test_throws DimensionMismatch rand_binomial!(A, count = ns, prob = ps) - ndim[k] -= 1 - pdim[l] += 1 - ns = CUDA.fill(128, Tuple(ndim)) - ps = CUDA.rand(pdim...) - @test_throws DimensionMismatch rand_binomial!(A, count = ns, prob = ps) - pdim[l] -= 1 - end - - # wrong number of dimensions - if i == length(Adims) - push!(ndim, 32) - ns = CUDA.fill(128, Tuple(ndim)) - ps = CUDA.rand(pdim...) - @test_throws DimensionMismatch rand_binomial!(A, count = ns, prob = ps) - pop!(ndim) - end - if j == length(Adims) - push!(pdim, 32) - ns = CUDA.fill(128, Tuple(ndim)) - ps = CUDA.rand(pdim...) - @test_throws DimensionMismatch rand_binomial!(A, count = ns, prob = ps) - pop!(pdim) - end - end - end - end - - @testset "bad parameter values" begin - # bad parameter values default - A = CUDA.zeros(Int, 256) - @test rand_binomial!(A, count = -1, prob = 0.5) == CUDA.zeros(256) # negative counts are equivalent to zero - @test rand_binomial!(A, count = 2, prob = -0.1) == CUDA.zeros(256) # negative probabilities are equivalent to zero - @test rand_binomial!(A, count = 2, prob = 1.5) == CUDA.fill(2, 256) # probabilities greater than 1 are equivalent to 1 - @test_throws MethodError rand_binomial!(A, count = 5., prob = 0.5) # non-integer counts throw an error - end - - @testset "benchmarks" begin - # benchmarks - A = CUDA.zeros(Int, 1024, 1024) - n = 128 - p = 0.5 - ns = CUDA.fill(128, (1024, 1024)) - ps = CUDA.rand(1024, 1024) - println("") - println("Benchmarking constant parameter array: should run in less than 2ms on an RTX20xx card") - display(@benchmark CUDA.@sync rand_binomial!($A, count = $n, prob = $p)) - println("") - println("Benchmarking full parameter array: should run in less than 2ms on an RTX20xx card") - display(@benchmark CUDA.@sync rand_binomial!($A, count = $ns, prob = $ps)) - println("") - end - end # in-place - - @testset "out-of-place" begin - @testset "scalar parameters" begin - A = rand_binomial(count = 10, prob = 0.5) - @test size(A) == (1,) - @test A isa CuVector{Int} - - A = rand_binomial(10, count = 10, prob = 0.5) - @test size(A) == (10,) - @test A isa CuVector{Int} - - A = rand_binomial(16, 32, count = 10, prob = 0.5) - @test size(A) == (16, 32) - @test A isa CuMatrix{Int} - - A = rand_binomial(2, 16, 32, count = 10, prob = 0.5) - @test size(A) == (2, 16, 32) - @test A isa CuArray{Int, 3} - end - @testset "parameter arrays" begin - @testset "array of dim $(length(Adims))" for Adims in [[2,], [2, 4], [2, 4, 8], [2, 4, 8, 16]] - @testset "count of dim 0, prob of dim $j" for j in eachindex(Adims) - pdim = Adims[1:j] - n = 128 - ps = CUDA.rand(pdim...) - A = rand_binomial(count = n, prob = ps) - @test size(A) == Tuple(pdim) - end - - @testset "count of dim $i, prob of dim 0" for i in eachindex(Adims) - ndim = Adims[1:i] - ns = CUDA.fill(128, Tuple(ndim)) - p = 0.5 - A = rand_binomial(count = ns, prob = p) - @test size(A) == Tuple(ndim) - end - - @testset "count of dim $i, prob of dim $j" for i in eachindex(Adims), j in eachindex(Adims) - ndim = Adims[1:i] - pdim = Adims[1:j] - ns = CUDA.fill(128, Tuple(ndim)) - ps = CUDA.rand(pdim...) - A = rand_binomial(count = ns, prob = ps) - if length(ndim) > length(pdim) - @test size(A) == Tuple(ndim) - else - @test size(A) == Tuple(pdim) - end - - # wrong size in the last dimension - for k in 1:i, l in 1:j - if i <= j - ndim[k] += 1 - ns = CUDA.fill(128, Tuple(ndim)) - ps = CUDA.rand(pdim...) - @test_throws DimensionMismatch rand_binomial(count = ns, prob = ps) - ndim[k] -= 1 - end - if i >= j - pdim[l] += 1 - ns = CUDA.fill(128, Tuple(ndim)) - ps = CUDA.rand(pdim...) - @test_throws DimensionMismatch rand_binomial(count = ns, prob = ps) - pdim[l] -= 1 - end - end - end - end - end - end # out-of-place + # @testset "in-place" begin + # @testset "scalar parameters" begin + # n = 128 + # p = 0.5 + + # @testset "A of dim $(length(Adims))" for Adims in [[2,], [2, 4], [2, 4, 8], [2, 4, 8, 16]] + # A = CUDA.zeros(Int, Tuple(Adims)) + # @test rand_binomial!(A, count = n, prob = p) isa CuArray{Int} + # @test minimum(rand_binomial!(A, count = n, prob = p)) >= 0 + # @test maximum(rand_binomial!(A, count = n, prob = p)) <= n + # end + # end + + # @testset "parameter arrays" begin + # @testset "A of dim $(length(Adims))" for Adims in [[2,], [2, 4], [2, 4, 8], [2, 4, 8, 16]] + # A = CUDA.zeros(Int, Tuple(Adims)) + # @testset "count of dim 0, prob of dim $j" for j in eachindex(Adims) + # pdim = Adims[1:j] + # n = 128 + # ps = CUDA.rand(pdim...) + # @test rand_binomial!(A, count = n, prob = ps) isa CuArray{Int} + # @test minimum(rand_binomial!(A, count = n, prob = ps)) >= 0 + # @test minimum(n .- rand_binomial!(A, count = n, prob = ps)) >= 0 + # end + + # @testset "count of dim $i, prob of dim 0" for i in eachindex(Adims) + # ndim = Adims[1:i] + # ns = CUDA.fill(128, Tuple(ndim)) + # p = 0.5 + # @test rand_binomial!(A, count = ns, prob = p) isa CuArray{Int} + # @test minimum(rand_binomial!(A, count = ns, prob = p)) >= 0 + # @test minimum(ns .- rand_binomial!(A, count = ns, prob = p)) >= 0 + # end + + # @testset "count of dim $i, prob of dim $j" for i in eachindex(Adims), j in eachindex(Adims) + # ndim = Adims[1:i] + # pdim = Adims[1:j] + # ns = CUDA.fill(128, Tuple(ndim)) + # ps = CUDA.rand(pdim...) + # n = 128 + # p = 0.5 + # @test rand_binomial!(A, count = ns, prob = ps) isa CuArray{Int} + # @test minimum(rand_binomial!(A, count = ns, prob = ps)) >= 0 + # @test minimum(ns .- rand_binomial!(A, count = ns, prob = ps)) >= 0 + + + # # wrong size in the last dimension + # for k in 1:i, l in 1:j + # ndim[k] += 1 + # ns = CUDA.fill(128, Tuple(ndim)) + # ps = CUDA.rand(pdim...) + # @test_throws DimensionMismatch rand_binomial!(A, count = ns, prob = ps) + # ndim[k] -= 1 + # pdim[l] += 1 + # ns = CUDA.fill(128, Tuple(ndim)) + # ps = CUDA.rand(pdim...) + # @test_throws DimensionMismatch rand_binomial!(A, count = ns, prob = ps) + # pdim[l] -= 1 + # end + + # # wrong number of dimensions + # if i == length(Adims) + # push!(ndim, 32) + # ns = CUDA.fill(128, Tuple(ndim)) + # ps = CUDA.rand(pdim...) + # @test_throws DimensionMismatch rand_binomial!(A, count = ns, prob = ps) + # pop!(ndim) + # end + # if j == length(Adims) + # push!(pdim, 32) + # ns = CUDA.fill(128, Tuple(ndim)) + # ps = CUDA.rand(pdim...) + # @test_throws DimensionMismatch rand_binomial!(A, count = ns, prob = ps) + # pop!(pdim) + # end + # end + # end + # end + + # @testset "bad parameter values" begin + # # bad parameter values default + # A = CUDA.zeros(Int, 256) + # @test rand_binomial!(A, count = -1, prob = 0.5) == CUDA.zeros(256) # negative counts are equivalent to zero + # @test rand_binomial!(A, count = 2, prob = -0.1) == CUDA.zeros(256) # negative probabilities are equivalent to zero + # @test rand_binomial!(A, count = 2, prob = 1.5) == CUDA.fill(2, 256) # probabilities greater than 1 are equivalent to 1 + # @test_throws MethodError rand_binomial!(A, count = 5., prob = 0.5) # non-integer counts throw an error + # end + + # @testset "benchmarks" begin + # # benchmarks + # A = CUDA.zeros(Int, 1024, 1024) + # n = 128 + # p = 0.5 + # ns = CUDA.fill(128, (1024, 1024)) + # ps = CUDA.rand(1024, 1024) + # println("") + # println("Benchmarking constant parameter array: should run in less than 2ms on an RTX20xx card") + # display(@benchmark CUDA.@sync rand_binomial!($A, count = $n, prob = $p)) + # println("") + # println("Benchmarking full parameter array: should run in less than 2ms on an RTX20xx card") + # display(@benchmark CUDA.@sync rand_binomial!($A, count = $ns, prob = $ps)) + # println("") + # end + # end # in-place + + # @testset "out-of-place" begin + # @testset "scalar parameters" begin + # A = rand_binomial(count = 10, prob = 0.5) + # @test size(A) == (1,) + # @test A isa CuVector{Int} + + # A = rand_binomial(10, count = 10, prob = 0.5) + # @test size(A) == (10,) + # @test A isa CuVector{Int} + + # A = rand_binomial(16, 32, count = 10, prob = 0.5) + # @test size(A) == (16, 32) + # @test A isa CuMatrix{Int} + + # A = rand_binomial(2, 16, 32, count = 10, prob = 0.5) + # @test size(A) == (2, 16, 32) + # @test A isa CuArray{Int, 3} + # end + # @testset "parameter arrays" begin + # @testset "array of dim $(length(Adims))" for Adims in [[2,], [2, 4], [2, 4, 8], [2, 4, 8, 16]] + # @testset "count of dim 0, prob of dim $j" for j in eachindex(Adims) + # pdim = Adims[1:j] + # n = 128 + # ps = CUDA.rand(pdim...) + # A = rand_binomial(count = n, prob = ps) + # @test size(A) == Tuple(pdim) + # end + + # @testset "count of dim $i, prob of dim 0" for i in eachindex(Adims) + # ndim = Adims[1:i] + # ns = CUDA.fill(128, Tuple(ndim)) + # p = 0.5 + # A = rand_binomial(count = ns, prob = p) + # @test size(A) == Tuple(ndim) + # end + + # @testset "count of dim $i, prob of dim $j" for i in eachindex(Adims), j in eachindex(Adims) + # ndim = Adims[1:i] + # pdim = Adims[1:j] + # ns = CUDA.fill(128, Tuple(ndim)) + # ps = CUDA.rand(pdim...) + # A = rand_binomial(count = ns, prob = ps) + # if length(ndim) > length(pdim) + # @test size(A) == Tuple(ndim) + # else + # @test size(A) == Tuple(pdim) + # end + + # # wrong size in the last dimension + # for k in 1:i, l in 1:j + # if i <= j + # ndim[k] += 1 + # ns = CUDA.fill(128, Tuple(ndim)) + # ps = CUDA.rand(pdim...) + # @test_throws DimensionMismatch rand_binomial(count = ns, prob = ps) + # ndim[k] -= 1 + # end + # if i >= j + # pdim[l] += 1 + # ns = CUDA.fill(128, Tuple(ndim)) + # ps = CUDA.rand(pdim...) + # @test_throws DimensionMismatch rand_binomial(count = ns, prob = ps) + # pdim[l] -= 1 + # end + # end + # end + # end + # end + # end # out-of-place @testset "Distributional tests" begin function mean_var_CI(m, S2, n, p, N, α) @@ -190,7 +190,7 @@ using Test b = quantile(Normal(), 1-α/2) c = quantile(Chisq(N-1), α/2) d = quantile(Chisq(N-1), 1-α/2) - @test a <= sqrt(N)*(m - truemean)/sqrt(truevar) <= b + @test sqrt(truevar/N)*a <= m - truemean <= sqrt(truevar/N)*b @test c <= (N-1)*S2/truevar <= d end @testset "Scalar parameters" begin From 85f5e9ba4c74593c2e4fbd3f8a32fc9153537a78 Mon Sep 17 00:00:00 2001 From: simsurace Date: Mon, 14 Feb 2022 21:24:58 +0100 Subject: [PATCH 09/13] Uncomment tests --- test/runtests.jl | 348 +++++++++++++++++++++++------------------------ 1 file changed, 174 insertions(+), 174 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index 411e80c..970539c 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -7,180 +7,180 @@ using BenchmarkTools using Test @testset "BinomialGPU.jl" begin - # @testset "in-place" begin - # @testset "scalar parameters" begin - # n = 128 - # p = 0.5 - - # @testset "A of dim $(length(Adims))" for Adims in [[2,], [2, 4], [2, 4, 8], [2, 4, 8, 16]] - # A = CUDA.zeros(Int, Tuple(Adims)) - # @test rand_binomial!(A, count = n, prob = p) isa CuArray{Int} - # @test minimum(rand_binomial!(A, count = n, prob = p)) >= 0 - # @test maximum(rand_binomial!(A, count = n, prob = p)) <= n - # end - # end - - # @testset "parameter arrays" begin - # @testset "A of dim $(length(Adims))" for Adims in [[2,], [2, 4], [2, 4, 8], [2, 4, 8, 16]] - # A = CUDA.zeros(Int, Tuple(Adims)) - # @testset "count of dim 0, prob of dim $j" for j in eachindex(Adims) - # pdim = Adims[1:j] - # n = 128 - # ps = CUDA.rand(pdim...) - # @test rand_binomial!(A, count = n, prob = ps) isa CuArray{Int} - # @test minimum(rand_binomial!(A, count = n, prob = ps)) >= 0 - # @test minimum(n .- rand_binomial!(A, count = n, prob = ps)) >= 0 - # end - - # @testset "count of dim $i, prob of dim 0" for i in eachindex(Adims) - # ndim = Adims[1:i] - # ns = CUDA.fill(128, Tuple(ndim)) - # p = 0.5 - # @test rand_binomial!(A, count = ns, prob = p) isa CuArray{Int} - # @test minimum(rand_binomial!(A, count = ns, prob = p)) >= 0 - # @test minimum(ns .- rand_binomial!(A, count = ns, prob = p)) >= 0 - # end - - # @testset "count of dim $i, prob of dim $j" for i in eachindex(Adims), j in eachindex(Adims) - # ndim = Adims[1:i] - # pdim = Adims[1:j] - # ns = CUDA.fill(128, Tuple(ndim)) - # ps = CUDA.rand(pdim...) - # n = 128 - # p = 0.5 - # @test rand_binomial!(A, count = ns, prob = ps) isa CuArray{Int} - # @test minimum(rand_binomial!(A, count = ns, prob = ps)) >= 0 - # @test minimum(ns .- rand_binomial!(A, count = ns, prob = ps)) >= 0 - - - # # wrong size in the last dimension - # for k in 1:i, l in 1:j - # ndim[k] += 1 - # ns = CUDA.fill(128, Tuple(ndim)) - # ps = CUDA.rand(pdim...) - # @test_throws DimensionMismatch rand_binomial!(A, count = ns, prob = ps) - # ndim[k] -= 1 - # pdim[l] += 1 - # ns = CUDA.fill(128, Tuple(ndim)) - # ps = CUDA.rand(pdim...) - # @test_throws DimensionMismatch rand_binomial!(A, count = ns, prob = ps) - # pdim[l] -= 1 - # end - - # # wrong number of dimensions - # if i == length(Adims) - # push!(ndim, 32) - # ns = CUDA.fill(128, Tuple(ndim)) - # ps = CUDA.rand(pdim...) - # @test_throws DimensionMismatch rand_binomial!(A, count = ns, prob = ps) - # pop!(ndim) - # end - # if j == length(Adims) - # push!(pdim, 32) - # ns = CUDA.fill(128, Tuple(ndim)) - # ps = CUDA.rand(pdim...) - # @test_throws DimensionMismatch rand_binomial!(A, count = ns, prob = ps) - # pop!(pdim) - # end - # end - # end - # end - - # @testset "bad parameter values" begin - # # bad parameter values default - # A = CUDA.zeros(Int, 256) - # @test rand_binomial!(A, count = -1, prob = 0.5) == CUDA.zeros(256) # negative counts are equivalent to zero - # @test rand_binomial!(A, count = 2, prob = -0.1) == CUDA.zeros(256) # negative probabilities are equivalent to zero - # @test rand_binomial!(A, count = 2, prob = 1.5) == CUDA.fill(2, 256) # probabilities greater than 1 are equivalent to 1 - # @test_throws MethodError rand_binomial!(A, count = 5., prob = 0.5) # non-integer counts throw an error - # end - - # @testset "benchmarks" begin - # # benchmarks - # A = CUDA.zeros(Int, 1024, 1024) - # n = 128 - # p = 0.5 - # ns = CUDA.fill(128, (1024, 1024)) - # ps = CUDA.rand(1024, 1024) - # println("") - # println("Benchmarking constant parameter array: should run in less than 2ms on an RTX20xx card") - # display(@benchmark CUDA.@sync rand_binomial!($A, count = $n, prob = $p)) - # println("") - # println("Benchmarking full parameter array: should run in less than 2ms on an RTX20xx card") - # display(@benchmark CUDA.@sync rand_binomial!($A, count = $ns, prob = $ps)) - # println("") - # end - # end # in-place - - # @testset "out-of-place" begin - # @testset "scalar parameters" begin - # A = rand_binomial(count = 10, prob = 0.5) - # @test size(A) == (1,) - # @test A isa CuVector{Int} - - # A = rand_binomial(10, count = 10, prob = 0.5) - # @test size(A) == (10,) - # @test A isa CuVector{Int} - - # A = rand_binomial(16, 32, count = 10, prob = 0.5) - # @test size(A) == (16, 32) - # @test A isa CuMatrix{Int} - - # A = rand_binomial(2, 16, 32, count = 10, prob = 0.5) - # @test size(A) == (2, 16, 32) - # @test A isa CuArray{Int, 3} - # end - # @testset "parameter arrays" begin - # @testset "array of dim $(length(Adims))" for Adims in [[2,], [2, 4], [2, 4, 8], [2, 4, 8, 16]] - # @testset "count of dim 0, prob of dim $j" for j in eachindex(Adims) - # pdim = Adims[1:j] - # n = 128 - # ps = CUDA.rand(pdim...) - # A = rand_binomial(count = n, prob = ps) - # @test size(A) == Tuple(pdim) - # end - - # @testset "count of dim $i, prob of dim 0" for i in eachindex(Adims) - # ndim = Adims[1:i] - # ns = CUDA.fill(128, Tuple(ndim)) - # p = 0.5 - # A = rand_binomial(count = ns, prob = p) - # @test size(A) == Tuple(ndim) - # end - - # @testset "count of dim $i, prob of dim $j" for i in eachindex(Adims), j in eachindex(Adims) - # ndim = Adims[1:i] - # pdim = Adims[1:j] - # ns = CUDA.fill(128, Tuple(ndim)) - # ps = CUDA.rand(pdim...) - # A = rand_binomial(count = ns, prob = ps) - # if length(ndim) > length(pdim) - # @test size(A) == Tuple(ndim) - # else - # @test size(A) == Tuple(pdim) - # end - - # # wrong size in the last dimension - # for k in 1:i, l in 1:j - # if i <= j - # ndim[k] += 1 - # ns = CUDA.fill(128, Tuple(ndim)) - # ps = CUDA.rand(pdim...) - # @test_throws DimensionMismatch rand_binomial(count = ns, prob = ps) - # ndim[k] -= 1 - # end - # if i >= j - # pdim[l] += 1 - # ns = CUDA.fill(128, Tuple(ndim)) - # ps = CUDA.rand(pdim...) - # @test_throws DimensionMismatch rand_binomial(count = ns, prob = ps) - # pdim[l] -= 1 - # end - # end - # end - # end - # end - # end # out-of-place + @testset "in-place" begin + @testset "scalar parameters" begin + n = 128 + p = 0.5 + + @testset "A of dim $(length(Adims))" for Adims in [[2,], [2, 4], [2, 4, 8], [2, 4, 8, 16]] + A = CUDA.zeros(Int, Tuple(Adims)) + @test rand_binomial!(A, count = n, prob = p) isa CuArray{Int} + @test minimum(rand_binomial!(A, count = n, prob = p)) >= 0 + @test maximum(rand_binomial!(A, count = n, prob = p)) <= n + end + end + + @testset "parameter arrays" begin + @testset "A of dim $(length(Adims))" for Adims in [[2,], [2, 4], [2, 4, 8], [2, 4, 8, 16]] + A = CUDA.zeros(Int, Tuple(Adims)) + @testset "count of dim 0, prob of dim $j" for j in eachindex(Adims) + pdim = Adims[1:j] + n = 128 + ps = CUDA.rand(pdim...) + @test rand_binomial!(A, count = n, prob = ps) isa CuArray{Int} + @test minimum(rand_binomial!(A, count = n, prob = ps)) >= 0 + @test minimum(n .- rand_binomial!(A, count = n, prob = ps)) >= 0 + end + + @testset "count of dim $i, prob of dim 0" for i in eachindex(Adims) + ndim = Adims[1:i] + ns = CUDA.fill(128, Tuple(ndim)) + p = 0.5 + @test rand_binomial!(A, count = ns, prob = p) isa CuArray{Int} + @test minimum(rand_binomial!(A, count = ns, prob = p)) >= 0 + @test minimum(ns .- rand_binomial!(A, count = ns, prob = p)) >= 0 + end + + @testset "count of dim $i, prob of dim $j" for i in eachindex(Adims), j in eachindex(Adims) + ndim = Adims[1:i] + pdim = Adims[1:j] + ns = CUDA.fill(128, Tuple(ndim)) + ps = CUDA.rand(pdim...) + n = 128 + p = 0.5 + @test rand_binomial!(A, count = ns, prob = ps) isa CuArray{Int} + @test minimum(rand_binomial!(A, count = ns, prob = ps)) >= 0 + @test minimum(ns .- rand_binomial!(A, count = ns, prob = ps)) >= 0 + + + # wrong size in the last dimension + for k in 1:i, l in 1:j + ndim[k] += 1 + ns = CUDA.fill(128, Tuple(ndim)) + ps = CUDA.rand(pdim...) + @test_throws DimensionMismatch rand_binomial!(A, count = ns, prob = ps) + ndim[k] -= 1 + pdim[l] += 1 + ns = CUDA.fill(128, Tuple(ndim)) + ps = CUDA.rand(pdim...) + @test_throws DimensionMismatch rand_binomial!(A, count = ns, prob = ps) + pdim[l] -= 1 + end + + # wrong number of dimensions + if i == length(Adims) + push!(ndim, 32) + ns = CUDA.fill(128, Tuple(ndim)) + ps = CUDA.rand(pdim...) + @test_throws DimensionMismatch rand_binomial!(A, count = ns, prob = ps) + pop!(ndim) + end + if j == length(Adims) + push!(pdim, 32) + ns = CUDA.fill(128, Tuple(ndim)) + ps = CUDA.rand(pdim...) + @test_throws DimensionMismatch rand_binomial!(A, count = ns, prob = ps) + pop!(pdim) + end + end + end + end + + @testset "bad parameter values" begin + # bad parameter values default + A = CUDA.zeros(Int, 256) + @test rand_binomial!(A, count = -1, prob = 0.5) == CUDA.zeros(256) # negative counts are equivalent to zero + @test rand_binomial!(A, count = 2, prob = -0.1) == CUDA.zeros(256) # negative probabilities are equivalent to zero + @test rand_binomial!(A, count = 2, prob = 1.5) == CUDA.fill(2, 256) # probabilities greater than 1 are equivalent to 1 + @test_throws MethodError rand_binomial!(A, count = 5., prob = 0.5) # non-integer counts throw an error + end + + @testset "benchmarks" begin + # benchmarks + A = CUDA.zeros(Int, 1024, 1024) + n = 128 + p = 0.5 + ns = CUDA.fill(128, (1024, 1024)) + ps = CUDA.rand(1024, 1024) + println("") + println("Benchmarking constant parameter array: should run in less than 2ms on an RTX20xx card") + display(@benchmark CUDA.@sync rand_binomial!($A, count = $n, prob = $p)) + println("") + println("Benchmarking full parameter array: should run in less than 2ms on an RTX20xx card") + display(@benchmark CUDA.@sync rand_binomial!($A, count = $ns, prob = $ps)) + println("") + end + end # in-place + + @testset "out-of-place" begin + @testset "scalar parameters" begin + A = rand_binomial(count = 10, prob = 0.5) + @test size(A) == (1,) + @test A isa CuVector{Int} + + A = rand_binomial(10, count = 10, prob = 0.5) + @test size(A) == (10,) + @test A isa CuVector{Int} + + A = rand_binomial(16, 32, count = 10, prob = 0.5) + @test size(A) == (16, 32) + @test A isa CuMatrix{Int} + + A = rand_binomial(2, 16, 32, count = 10, prob = 0.5) + @test size(A) == (2, 16, 32) + @test A isa CuArray{Int, 3} + end + @testset "parameter arrays" begin + @testset "array of dim $(length(Adims))" for Adims in [[2,], [2, 4], [2, 4, 8], [2, 4, 8, 16]] + @testset "count of dim 0, prob of dim $j" for j in eachindex(Adims) + pdim = Adims[1:j] + n = 128 + ps = CUDA.rand(pdim...) + A = rand_binomial(count = n, prob = ps) + @test size(A) == Tuple(pdim) + end + + @testset "count of dim $i, prob of dim 0" for i in eachindex(Adims) + ndim = Adims[1:i] + ns = CUDA.fill(128, Tuple(ndim)) + p = 0.5 + A = rand_binomial(count = ns, prob = p) + @test size(A) == Tuple(ndim) + end + + @testset "count of dim $i, prob of dim $j" for i in eachindex(Adims), j in eachindex(Adims) + ndim = Adims[1:i] + pdim = Adims[1:j] + ns = CUDA.fill(128, Tuple(ndim)) + ps = CUDA.rand(pdim...) + A = rand_binomial(count = ns, prob = ps) + if length(ndim) > length(pdim) + @test size(A) == Tuple(ndim) + else + @test size(A) == Tuple(pdim) + end + + # wrong size in the last dimension + for k in 1:i, l in 1:j + if i <= j + ndim[k] += 1 + ns = CUDA.fill(128, Tuple(ndim)) + ps = CUDA.rand(pdim...) + @test_throws DimensionMismatch rand_binomial(count = ns, prob = ps) + ndim[k] -= 1 + end + if i >= j + pdim[l] += 1 + ns = CUDA.fill(128, Tuple(ndim)) + ps = CUDA.rand(pdim...) + @test_throws DimensionMismatch rand_binomial(count = ns, prob = ps) + pdim[l] -= 1 + end + end + end + end + end + end # out-of-place @testset "Distributional tests" begin function mean_var_CI(m, S2, n, p, N, α) From f1e68f9ef4395574b2ca92b850557cfbab455a4d Mon Sep 17 00:00:00 2001 From: simsurace Date: Mon, 14 Feb 2022 21:25:08 +0100 Subject: [PATCH 10/13] Fix kernel --- src/kernels.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/kernels.jl b/src/kernels.jl index b09510e..61b8533 100644 --- a/src/kernels.jl +++ b/src/kernels.jl @@ -269,9 +269,9 @@ function kernel_BTRS!( if i <= length(A) if invert - @inbounds A[i] = k - else @inbounds A[i] = n - k + else + @inbounds A[i] = k end end offset += window From 7a94e264dd9a301799fbfcf2c9add65117ca8f6a Mon Sep 17 00:00:00 2001 From: simsurace Date: Mon, 14 Feb 2022 21:25:27 +0100 Subject: [PATCH 11/13] Bump patch version --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 4e3f3d9..fe2cdbb 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "BinomialGPU" uuid = "c5bbfde1-2136-42cd-9b65-d5719df69ebf" authors = ["Simone Carlo Surace"] -version = "0.4.2" +version = "0.4.3" [deps] CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" From 5a923c8e9d7f7ca0c4bca67a90ff0d3fe007b610 Mon Sep 17 00:00:00 2001 From: simsurace Date: Mon, 14 Feb 2022 21:26:44 +0100 Subject: [PATCH 12/13] Simplify test --- test/runtests.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/runtests.jl b/test/runtests.jl index 970539c..8e5e3ef 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -190,7 +190,7 @@ using Test b = quantile(Normal(), 1-α/2) c = quantile(Chisq(N-1), α/2) d = quantile(Chisq(N-1), 1-α/2) - @test sqrt(truevar/N)*a <= m - truemean <= sqrt(truevar/N)*b + @test a <= sqrt(N/truevar)*(m - truemean) <= b @test c <= (N-1)*S2/truevar <= d end @testset "Scalar parameters" begin From 9aebcaef5aa56169b8676e3afece5502ca01ab07 Mon Sep 17 00:00:00 2001 From: simsurace Date: Mon, 14 Feb 2022 21:38:51 +0100 Subject: [PATCH 13/13] Put benchmarks at the end --- test/runtests.jl | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index 8e5e3ef..cf9ec21 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -94,22 +94,6 @@ using Test @test rand_binomial!(A, count = 2, prob = 1.5) == CUDA.fill(2, 256) # probabilities greater than 1 are equivalent to 1 @test_throws MethodError rand_binomial!(A, count = 5., prob = 0.5) # non-integer counts throw an error end - - @testset "benchmarks" begin - # benchmarks - A = CUDA.zeros(Int, 1024, 1024) - n = 128 - p = 0.5 - ns = CUDA.fill(128, (1024, 1024)) - ps = CUDA.rand(1024, 1024) - println("") - println("Benchmarking constant parameter array: should run in less than 2ms on an RTX20xx card") - display(@benchmark CUDA.@sync rand_binomial!($A, count = $n, prob = $p)) - println("") - println("Benchmarking full parameter array: should run in less than 2ms on an RTX20xx card") - display(@benchmark CUDA.@sync rand_binomial!($A, count = $ns, prob = $ps)) - println("") - end end # in-place @testset "out-of-place" begin @@ -216,4 +200,20 @@ using Test end end end # Distributional tests + + @testset "benchmarks" begin + # benchmarks + A = CUDA.zeros(Int, 1024, 1024) + n = 128 + p = 0.5 + ns = CUDA.fill(128, (1024, 1024)) + ps = CUDA.rand(1024, 1024) + println("") + println("Benchmarking constant parameter array: should run in less than 2ms on an RTX20xx card") + display(@benchmark CUDA.@sync rand_binomial!($A, count = $n, prob = $p)) + println("") + println("Benchmarking full parameter array: should run in less than 2ms on an RTX20xx card") + display(@benchmark CUDA.@sync rand_binomial!($A, count = $ns, prob = $ps)) + println("") + end end