Skip to content

Commit

Permalink
Change FH -> HF
Browse files Browse the repository at this point in the history
Swap F and H inds
  • Loading branch information
charleskawczynski committed Oct 20, 2024
1 parent 0635ff3 commit 81a2668
Show file tree
Hide file tree
Showing 74 changed files with 766 additions and 752 deletions.
4 changes: 2 additions & 2 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@ PR [#2033](https://github.com/CliMA/ClimaCore.jl/pull/2033) introduces new
constructors for `DataLayout`s. Instead of writing
```julia
array = rand(FT, Nv, Nij, Nij, 3, Nh)
data = VIJFH{S, Nv, Nij}(array)
data = VIJHF{S, Nv, Nij}(array)
```

You can now write
```julia
data = VIJFH{S}(ArrayType{FT}, rand; Nv, Nij, Nh)
data = VIJHF{S}(ArrayType{FT}, rand; Nv, Nij, Nh)
```
and grab the `array` with `parent(data)` (if you need).

Expand Down
8 changes: 4 additions & 4 deletions docs/src/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@ DataLayouts.DataF
DataLayouts.IF
DataLayouts.IJF
DataLayouts.VF
DataLayouts.IFH
DataLayouts.IJFH
DataLayouts.VIFH
DataLayouts.VIJFH
DataLayouts.IHF
DataLayouts.IJHF
DataLayouts.VIHF
DataLayouts.VIJHF
```

## Geometry
Expand Down
2 changes: 1 addition & 1 deletion examples/hybrid/sphere/solid_body_rotation_3d.jl
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ discrete_hydrostatic_balance!(ρ, p, z_top / n_vert, grav)
# set up initial condition: not discretely balanced; only create a Field as a place holder
Yc = map(coord -> init_sbr_thermo(coord.z), c_coords)
# put the dicretely balanced ρ and ρe into Yc
parent(Yc.ρ) .= ρ # Yc.ρ is a VIJFH layout
parent(Yc.ρ) .= ρ # Yc.ρ is a VIJHF layout
parent(Yc.ρe) .= ρe

# initialize velocity: at rest
Expand Down
4 changes: 2 additions & 2 deletions ext/cuda/data_layouts.jl
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@

import ClimaCore.DataLayouts: AbstractData
import ClimaCore.DataLayouts: FusedMultiBroadcast
import ClimaCore.DataLayouts: IJKFVH, IJFH, VIJFH, VIFH, IFH, IJF, IF, VF, DataF
import ClimaCore.DataLayouts: IJFHStyle, VIJFHStyle, VFStyle, DataFStyle
import ClimaCore.DataLayouts: IJKFVH, IJHF, VIJHF, VIHF, IHF, IJF, IF, VF, DataF
import ClimaCore.DataLayouts: IJHFStyle, VIJHFStyle, VFStyle, DataFStyle
import ClimaCore.DataLayouts: promote_parent_array_type
import ClimaCore.DataLayouts: parent_array_type
import ClimaCore.DataLayouts: isascalar
Expand Down
20 changes: 10 additions & 10 deletions ext/cuda/data_layouts_mapreduce.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ end
function mapreduce_cuda(
f,
op,
data::Union{DataLayouts.VF, DataLayouts.IJFH, DataLayouts.VIJFH};
data::Union{DataLayouts.VF, DataLayouts.IJHF, DataLayouts.VIJHF};
weighted_jacobian = OnesArray(parent(data)),
opargs...,
)
Expand Down Expand Up @@ -83,7 +83,7 @@ function mapreduce_cuda_kernel!(
gidx = _get_gidx(tidx, bidx, effective_blksize)
reduction = CUDA.CuStaticSharedArray(T, shmemsize)
reduction[tidx] = 0
(Nv, Nij, Nf, Nh) = _get_dims(dataview)
(Nv, Nij, Nh, Nf) = _get_dims(dataview)
nitems = Nv * Nij * Nij * Nf * Nh

# load shmem
Expand Down Expand Up @@ -115,21 +115,21 @@ end
@inline _dataview(pdata::AbstractArray{FT, 2}, fidx) where {FT} =
view(pdata, :, fidx:fidx)

# for IJFH DataLayout
# for IJHF DataLayout
@inline function _get_dims(pdata::AbstractArray{FT, 4}) where {FT}
(Nij, _, Nf, Nh) = size(pdata)
return (1, Nij, Nf, Nh)
(Nij, _, Nh, Nf) = size(pdata)
return (1, Nij, Nh, Nf)
end
@inline _dataview(pdata::AbstractArray{FT, 4}, fidx) where {FT} =
view(pdata, :, :, fidx:fidx, :)
view(pdata, :, :, :, fidx:fidx)

# for VIJFH DataLayout
# for VIJHF DataLayout
@inline function _get_dims(pdata::AbstractArray{FT, 5}) where {FT}
(Nv, Nij, _, Nf, Nh) = size(pdata)
return (Nv, Nij, Nf, Nh)
(Nv, Nij, _, Nh, Nf) = size(pdata)
return (Nv, Nij, Nh, Nf)
end
@inline _dataview(pdata::AbstractArray{FT, 5}, fidx) where {FT} =
view(pdata, :, :, :, fidx:fidx, :)
view(pdata, :, :, :, :, fidx:fidx)

@inline function _cuda_reduce!(op, reduction, tidx, reduction_size, N)
if reduction_size > N
Expand Down
34 changes: 17 additions & 17 deletions ext/cuda/data_layouts_threadblock.jl
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ criteria:
in the thread partition
- The order of the thread partition should
follow the fastest changing index in the
datalayout (e.g., VIJ in VIJFH)
datalayout (e.g., VIJ in VIJHF)
"""
function partition end

Expand All @@ -46,25 +46,25 @@ bounds to ensure that the result of
"""
function is_valid_index end

##### VIJFH
@inline function partition(data::DataLayouts.VIJFH, n_max_threads::Integer)
##### VIJHF
@inline function partition(data::DataLayouts.VIJHF, n_max_threads::Integer)
(Nij, _, _, Nv, Nh) = DataLayouts.universal_size(data)
Nv_thread = min(Int(fld(n_max_threads, Nij * Nij)), Nv)
Nv_blocks = cld(Nv, Nv_thread)
@assert prod((Nv_thread, Nij, Nij)) n_max_threads "threads,n_max_threads=($(prod((Nv_thread, Nij, Nij))),$n_max_threads)"
return (; threads = (Nv_thread, Nij, Nij), blocks = (Nv_blocks, Nh))
end
@inline function universal_index(::DataLayouts.VIJFH)
@inline function universal_index(::DataLayouts.VIJHF)
(tv, i, j) = CUDA.threadIdx()
(bv, h) = CUDA.blockIdx()
v = tv + (bv - 1) * CUDA.blockDim().x
return CartesianIndex((i, j, 1, v, h))
end
@inline is_valid_index(::DataLayouts.VIJFH, I::CI5, us::UniversalSize) =
@inline is_valid_index(::DataLayouts.VIJHF, I::CI5, us::UniversalSize) =
1 I[4] DataLayouts.get_Nv(us)

##### IJFH
@inline function partition(data::DataLayouts.IJFH, n_max_threads::Integer)
##### IJHF
@inline function partition(data::DataLayouts.IJHF, n_max_threads::Integer)
(Nij, _, _, _, Nh) = DataLayouts.universal_size(data)
Nh_thread = min(
Int(fld(n_max_threads, Nij * Nij)),
Expand All @@ -75,30 +75,30 @@ end
@assert prod((Nij, Nij)) n_max_threads "threads,n_max_threads=($(prod((Nij, Nij))),$n_max_threads)"
return (; threads = (Nij, Nij, Nh_thread), blocks = (Nh_blocks,))
end
@inline function universal_index(::DataLayouts.IJFH)
@inline function universal_index(::DataLayouts.IJHF)
(i, j, th) = CUDA.threadIdx()
(bh,) = CUDA.blockIdx()
h = th + (bh - 1) * CUDA.blockDim().z
return CartesianIndex((i, j, 1, 1, h))
end
@inline is_valid_index(::DataLayouts.IJFH, I::CI5, us::UniversalSize) =
@inline is_valid_index(::DataLayouts.IJHF, I::CI5, us::UniversalSize) =
1 I[5] DataLayouts.get_Nh(us)

##### IFH
@inline function partition(data::DataLayouts.IFH, n_max_threads::Integer)
##### IHF
@inline function partition(data::DataLayouts.IHF, n_max_threads::Integer)
(Ni, _, _, _, Nh) = DataLayouts.universal_size(data)
Nh_thread = min(Int(fld(n_max_threads, Ni)), Nh)
Nh_blocks = cld(Nh, Nh_thread)
@assert prod((Ni, Nh_thread)) n_max_threads "threads,n_max_threads=($(prod((Ni, Nh_thread))),$n_max_threads)"
return (; threads = (Ni, Nh_thread), blocks = (Nh_blocks,))
end
@inline function universal_index(::DataLayouts.IFH)
@inline function universal_index(::DataLayouts.IHF)
(i, th) = CUDA.threadIdx()
(bh,) = CUDA.blockIdx()
h = th + (bh - 1) * CUDA.blockDim().y
return CartesianIndex((i, 1, 1, 1, h))
end
@inline is_valid_index(::DataLayouts.IFH, I::CI5, us::UniversalSize) =
@inline is_valid_index(::DataLayouts.IHF, I::CI5, us::UniversalSize) =
1 I[5] DataLayouts.get_Nh(us)

##### IJF
Expand All @@ -125,21 +125,21 @@ end
end
@inline is_valid_index(::DataLayouts.IF, I::CI5, us::UniversalSize) = true

##### VIFH
@inline function partition(data::DataLayouts.VIFH, n_max_threads::Integer)
##### VIHF
@inline function partition(data::DataLayouts.VIHF, n_max_threads::Integer)
(Ni, _, _, Nv, Nh) = DataLayouts.universal_size(data)
Nv_thread = min(Int(fld(n_max_threads, Ni)), Nv)
Nv_blocks = cld(Nv, Nv_thread)
@assert prod((Nv_thread, Ni)) n_max_threads "threads,n_max_threads=($(prod((Nv_thread, Ni))),$n_max_threads)"
return (; threads = (Nv_thread, Ni), blocks = (Nv_blocks, Nh))
end
@inline function universal_index(::DataLayouts.VIFH)
@inline function universal_index(::DataLayouts.VIHF)
(tv, i) = CUDA.threadIdx()
(bv, h) = CUDA.blockIdx()
v = tv + (bv - 1) * CUDA.blockDim().x
return CartesianIndex((i, 1, 1, v, h))
end
@inline is_valid_index(::DataLayouts.VIFH, I::CI5, us::UniversalSize) =
@inline is_valid_index(::DataLayouts.VIHF, I::CI5, us::UniversalSize) =
1 I[4] DataLayouts.get_Nv(us)

##### VF
Expand Down
50 changes: 25 additions & 25 deletions ext/cuda/topologies_dss.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ _configure_threadblock(nitems) =
function Topologies.dss_load_perimeter_data!(
::ClimaComms.CUDADevice,
dss_buffer::Topologies.DSSBuffer,
data::Union{DataLayouts.IJFH, DataLayouts.VIJFH},
data::Union{DataLayouts.IJHF, DataLayouts.VIJHF},
perimeter::Topologies.Perimeter2D,
)
(; perimeter_data) = dss_buffer
Expand All @@ -37,13 +37,13 @@ end

function dss_load_perimeter_data_kernel!(
perimeter_data::DataLayouts.AbstractData,
data::Union{DataLayouts.IJFH, DataLayouts.VIJFH},
data::Union{DataLayouts.IJHF, DataLayouts.VIJHF},
perimeter::Topologies.Perimeter2D{Nq},
) where {Nq}
gidx = threadIdx().x + (blockIdx().x - Int32(1)) * blockDim().x
(nperimeter, _, _, nlevels, nelems) = size(perimeter_data)
nfidx = DataLayouts.ncomponents(perimeter_data)
sizep = (nlevels, nperimeter, nfidx, nelems) # assume VIFH order
sizep = (nlevels, nperimeter, nfidx, nelems) # assume VIHF order
CI = CartesianIndex

if gidx prod(sizep)
Expand All @@ -57,7 +57,7 @@ end

function Topologies.dss_unload_perimeter_data!(
::ClimaComms.CUDADevice,
data::Union{DataLayouts.IJFH, DataLayouts.VIJFH},
data::Union{DataLayouts.IJHF, DataLayouts.VIJHF},
dss_buffer::Topologies.DSSBuffer,
perimeter,
)
Expand All @@ -76,14 +76,14 @@ function Topologies.dss_unload_perimeter_data!(
end

function dss_unload_perimeter_data_kernel!(
data::Union{DataLayouts.IJFH, DataLayouts.VIJFH},
data::Union{DataLayouts.IJHF, DataLayouts.VIJHF},
perimeter_data::AbstractData,
perimeter::Topologies.Perimeter2D{Nq},
) where {Nq}
gidx = threadIdx().x + (blockIdx().x - Int32(1)) * blockDim().x
(nperimeter, _, _, nlevels, nelems) = size(perimeter_data)
nfidx = DataLayouts.ncomponents(perimeter_data)
sizep = (nlevels, nperimeter, nfidx, nelems) # assume VIFH order
sizep = (nlevels, nperimeter, nfidx, nelems) # assume VIHF order
CI = CartesianIndex

if gidx prod(sizep)
Expand All @@ -97,7 +97,7 @@ end

function Topologies.dss_local!(
::ClimaComms.CUDADevice,
perimeter_data::DataLayouts.VIFH,
perimeter_data::DataLayouts.VIHF,
perimeter::Topologies.Perimeter2D,
topology::Topologies.Topology2D,
)
Expand Down Expand Up @@ -127,7 +127,7 @@ function Topologies.dss_local!(
end

function dss_local_kernel!(
perimeter_data::DataLayouts.VIFH,
perimeter_data::DataLayouts.VIHF,
local_vertices::AbstractVector{Tuple{Int, Int}},
local_vertex_offset::AbstractVector{Int},
interior_faces::AbstractVector{Tuple{Int, Int, Int, Int, Bool}},
Expand Down Expand Up @@ -189,11 +189,11 @@ end

function Topologies.dss_transform!(
device::ClimaComms.CUDADevice,
perimeter_data::DataLayouts.VIFH,
data::Union{DataLayouts.VIJFH, DataLayouts.IJFH},
perimeter_data::DataLayouts.VIHF,
data::Union{DataLayouts.VIJHF, DataLayouts.IJHF},
perimeter::Topologies.Perimeter2D,
local_geometry::Union{DataLayouts.IJFH, DataLayouts.VIJFH},
weight::DataLayouts.IJFH,
local_geometry::Union{DataLayouts.IJHF, DataLayouts.VIJHF},
weight::DataLayouts.IJHF,
localelems::AbstractVector{Int},
)
nlocalelems = length(localelems)
Expand Down Expand Up @@ -224,11 +224,11 @@ function Topologies.dss_transform!(
end

function dss_transform_kernel!(
perimeter_data::DataLayouts.VIFH,
data::Union{DataLayouts.VIJFH, DataLayouts.IJFH},
perimeter_data::DataLayouts.VIHF,
data::Union{DataLayouts.VIJHF, DataLayouts.IJHF},
perimeter::Topologies.Perimeter2D,
local_geometry::Union{DataLayouts.IJFH, DataLayouts.VIJFH},
weight::DataLayouts.IJFH,
local_geometry::Union{DataLayouts.IJHF, DataLayouts.VIJHF},
weight::DataLayouts.IJHF,
localelems::AbstractVector{Int},
::Val{nlocalelems},
) where {nlocalelems}
Expand All @@ -255,9 +255,9 @@ end

function Topologies.dss_untransform!(
device::ClimaComms.CUDADevice,
perimeter_data::DataLayouts.VIFH,
data::Union{DataLayouts.VIJFH, DataLayouts.IJFH},
local_geometry::Union{DataLayouts.IJFH, DataLayouts.VIJFH},
perimeter_data::DataLayouts.VIHF,
data::Union{DataLayouts.VIJHF, DataLayouts.IJHF},
local_geometry::Union{DataLayouts.IJHF, DataLayouts.VIJHF},
perimeter::Topologies.Perimeter2D,
localelems::AbstractVector{Int},
)
Expand Down Expand Up @@ -287,9 +287,9 @@ function Topologies.dss_untransform!(
end

function dss_untransform_kernel!(
perimeter_data::DataLayouts.VIFH,
data::Union{DataLayouts.VIJFH, DataLayouts.IJFH},
local_geometry::Union{DataLayouts.IJFH, DataLayouts.VIJFH},
perimeter_data::DataLayouts.VIHF,
data::Union{DataLayouts.VIJHF, DataLayouts.IJHF},
local_geometry::Union{DataLayouts.IJHF, DataLayouts.VIJHF},
perimeter::Topologies.Perimeter2D,
localelems::AbstractVector{Int},
::Val{nlocalelems},
Expand All @@ -316,7 +316,7 @@ end
# TODO: Function stubs, code to be implemented, needed only for distributed GPU runs
function Topologies.dss_local_ghost!(
::ClimaComms.CUDADevice,
perimeter_data::DataLayouts.VIFH,
perimeter_data::DataLayouts.VIHF,
perimeter::Topologies.Perimeter2D,
topology::Topologies.AbstractTopology,
)
Expand Down Expand Up @@ -344,7 +344,7 @@ function Topologies.dss_local_ghost!(
end

function dss_local_ghost_kernel!(
perimeter_data::DataLayouts.VIFH,
perimeter_data::DataLayouts.VIHF,
ghost_vertices,
ghost_vertex_offset,
perimeter::Topologies.Perimeter2D,
Expand Down Expand Up @@ -487,7 +487,7 @@ end

function Topologies.dss_ghost!(
::ClimaComms.CUDADevice,
perimeter_data::DataLayouts.VIFH,
perimeter_data::DataLayouts.VIHF,
perimeter::Topologies.Perimeter2D,
topology::Topologies.Topology2D,
)
Expand Down
6 changes: 3 additions & 3 deletions lib/ClimaCorePlots/src/ClimaCorePlots.jl
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ RecipesBase.@recipe function f(space::Spaces.ExtrudedFiniteDifferenceSpace)
data = Fields.field_values(coord_field)
Ni, Nj, _, Nv, Nh = size(data)

#TODO: assumes VIFH layout
#TODO: assumes VIHF layout
@assert Nj == 1 "plotting only defined for 1D extruded fields"

hspace = Spaces.horizontal_space(space)
Expand Down Expand Up @@ -431,7 +431,7 @@ function _unfolded_pannel_matrix(field, interpolate)
# TODO: inefficient memory wise, but good enough for now
panels = [fill(NaN, (panel_size * dof, panel_size * dof)) for _ in 1:6]

interpolated_data = DataLayouts.IJFH{FT, interpolate}(Array{FT}, nelem)
interpolated_data = DataLayouts.IJHF{FT, interpolate}(Array{FT}, nelem)
field_data = Fields.field_values(field)

Operators.tensor_product!(interpolated_data, field_data, Imat)
Expand All @@ -445,7 +445,7 @@ function _unfolded_pannel_matrix(field, interpolate)
x2_nodal_range = (dof * (ex2 - 1) + 1):(dof * ex2)
# transpose the data as our plotting axis order is
# reverse nodal element order (x1 axis varies fastest)
data_element = permutedims(parent(interpolated_data)[:, :, 1, lidx])
data_element = permutedims(parent(interpolated_data)[:, :, lidx, 1])
panel_data[x2_nodal_range, x1_nodal_range] = data_element
end

Expand Down
Loading

0 comments on commit 81a2668

Please sign in to comment.