Skip to content

Commit

Permalink
Fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
charleskawczynski committed Sep 2, 2024
1 parent b54dad5 commit 9ba577b
Show file tree
Hide file tree
Showing 6 changed files with 67 additions and 20 deletions.
39 changes: 35 additions & 4 deletions ext/cuda/data_layouts.jl
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,38 @@ function CUDA.CuArray(fa::DL.FieldArray{FD}) where {FD}
return DL.FieldArray{FD}(arrays)
end

DL.field_array(
array::CUDA.CuArray,
as::ArraySize
) = CUDA.CuArray(DL.field_array(Array(array), as))
DL.field_array(array::CUDA.CuArray, as::ArraySize) =
CUDA.CuArray(DL.field_array(Array(array), as))


# TODO: this could be improved, but it's not typically used at runtime
function copyto_field_array_knl!(x::DL.FieldArray{FD}, y) where {FD}
gidx =
CUDA.threadIdx().x + (CUDA.blockIdx().x - Int32(1)) * CUDA.blockDim().x
I = cart_ind(size(y), gidx)
x[I] = y[I]
return nothing
end

@inline function Base.copyto!(
x::DL.FieldArray{FD, NT},
y::CUDA.CuArray,
) where {FD, NT <: NTuple}
if ndims(eltype(NT)) == ndims(y)
@inbounds for i in 1:DL.tuple_length(NT)
Base.copyto!(x.arrays[i], y)
end
elseif ndims(eltype(NT)) + 1 == ndims(y)
n = prod(size(y))
kernel =
CUDA.@cuda always_inline = true launch = false copyto_field_array_knl!(
x,
y,
)
config = CUDA.launch_configuration(kernel.fun)
threads = min(n, config.threads)
blocks = cld(n, threads)
kernel(x, y; threads, blocks)
end
x
end
27 changes: 15 additions & 12 deletions ext/cuda/data_layouts_copyto.jl
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,13 @@ function knl_copyto_linear!(dest::AbstractData, bc, us)
return nothing
end

function knl_copyto_linear!(dest::DataF, bc, us)
tidx = thread_index()
@inbounds dest[] = bc[tidx]
function knl_copyto_linear!(dest::DataF{S},bc,us) where {S}
@inbounds begin
tidx = thread_index()
if tidx get_N(us)
dest[] = bc[tidx]
end
end
return nothing
end

Expand All @@ -48,13 +52,17 @@ function knl_copyto_flat!(dest::AbstractData, bc, us)
return nothing
end

function knl_copyto_flat!(dest::DataF, bc, us)
function knl_copyto_flat!(
dest::DataF{S},
bc::DataLayouts.BroadcastedUnionDataF{S},
us,
) where {S}
@inbounds begin
tidx = thread_index()
if tidx get_N(us)
n = size(dest)
I = kernel_indexes(tidx, n)
dest[] = bc[I]
# I = kernel_indexes(tidx, n)
dest[] = bc[]
end
end
return nothing
Expand All @@ -67,12 +75,7 @@ function cuda_copyto!(dest::AbstractData, bc)
if Nv > 0 && Nh > 0
if has_uniform_datalayouts(bc)
bc′ = to_non_extruded_broadcasted(bc)
auto_launch!(
knl_copyto_linear!,
(dest, bc′, us),
n;
auto = true,
)
auto_launch!(knl_copyto_linear!, (dest, bc′, us), n; auto = true)
else
auto_launch!(knl_copyto_flat!, (dest, bc, us), n; auto = true)
end
Expand Down
14 changes: 13 additions & 1 deletion src/DataLayouts/DataLayouts.jl
Original file line number Diff line number Diff line change
Expand Up @@ -564,6 +564,10 @@ end
@inbounds col[]
end

@propagate_inbounds function Base.getindex(col::Data0D, I::Integer)
@inbounds col[]
end

Base.@propagate_inbounds function Base.setindex!(data::DataF{S}, val) where {S}
@inbounds set_struct!(
field_array(data),
Expand All @@ -581,6 +585,14 @@ end
@inbounds col[] = val
end

@propagate_inbounds function Base.setindex!(
col::Data0D,
val,
I::Integer,
)
@inbounds col[] = val
end

# ======================
# DataSlab2D DataLayout
# ======================
Expand Down Expand Up @@ -1306,7 +1318,7 @@ type parameters.
#! format: on

# Skip DataF here, since we want that to MethodError.
for DL in (:IJKFVH, :IJFH, :IFH, :IJF, :IF, :VF, :VIJFH, :VIFH)
for DL in (:IJKFVH, :IJFH, :IFH, :IJF, :IF, :DataF, :VF, :VIJFH, :VIFH)
@eval @propagate_inbounds Base.getindex(data::$(DL), I::Integer) =
linear_getindex(data, I)
@eval @propagate_inbounds Base.setindex!(data::$(DL), val, I::Integer) =
Expand Down
2 changes: 0 additions & 2 deletions src/DataLayouts/copyto.jl
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,6 @@ end

# broadcasting scalar assignment
# Performance optimization for the common identity scalar case: dest .= val
# And this is valid for the CPU or GPU, since the broadcasted object
# is a scalar type.
function Base.copyto!(
dest::AbstractData,
bc::Base.Broadcast.Broadcasted{Style},
Expand Down
3 changes: 3 additions & 0 deletions src/DataLayouts/field_array.jl
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,9 @@ function field_array(
FieldArray{FD}(EmptyArray(scalar_array))
end

# Base.show(io::IO, fa::FieldArray{FD}) where {FD} = print(io, "$(arrays_type(typeof(fa)))(", Array(fa), ")")
Base.show(io::IO, fa::FieldArray{FD}) where {FD} = print(io, "FieldArray{$FD}(", Array(fa), ")")

# Warning: this method is type-unstable.
function Base.view(fa::FieldArray{FD}, inds...) where {FD}
AI = dropat(inds, Val(FD))
Expand Down
2 changes: 1 addition & 1 deletion test/DataLayouts/data0d.jl
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ end
array = zeros(Float64, 3)
data = DataF{S}(array)
@test data[][2] == zero(Float64)
@test_throws MethodError data[1]
# @test_throws MethodError data[1] # this no longer can throw an error
end

@testset "DataF type safety" begin
Expand Down

0 comments on commit 9ba577b

Please sign in to comment.