From 724571e9b2abf878145c0d1b9f91b7a2d3be6fbd Mon Sep 17 00:00:00 2001 From: Charles Kawczynski Date: Sat, 1 Jun 2024 10:45:00 -0400 Subject: [PATCH] Remove some explicit CUDA dependence in tests --- .../matrix_fields_multiple_field_solve.jl | 5 ++-- src/ClimaCore.jl | 1 + src/MatrixFields/MatrixFields.jl | 9 +++--- src/MatrixFields/field2arrays.jl | 20 ++++++------- src/interface.jl | 3 ++ test/DataLayouts/cuda.jl | 28 ++++++++++++------- test/Fields/field.jl | 27 ++++++++++-------- test/Fields/field_multi_broadcast_fusion.jl | 7 ++--- test/Fields/reduction_cuda.jl | 2 +- test/Fields/reduction_cuda_distributed.jl | 1 - test/Operators/finitedifference/column.jl | 5 ++-- .../finitedifference/opt_examples.jl | 10 +++---- test/Operators/hybrid/extruded_3dbox_cuda.jl | 7 ++--- test/Operators/hybrid/extruded_sphere_cuda.jl | 7 +++-- 14 files changed, 68 insertions(+), 64 deletions(-) diff --git a/ext/cuda/matrix_fields_multiple_field_solve.jl b/ext/cuda/matrix_fields_multiple_field_solve.jl index 64590a0c65..3b1d2d9ec2 100644 --- a/ext/cuda/matrix_fields_multiple_field_solve.jl +++ b/ext/cuda/matrix_fields_multiple_field_solve.jl @@ -6,11 +6,10 @@ import ClimaCore.MatrixFields import ClimaCore.MatrixFields: _single_field_solve! import ClimaCore.MatrixFields: multiple_field_solve! import ClimaCore.MatrixFields: is_CuArray_type -import ClimaCore.MatrixFields: allow_scalar_func +import ClimaCore: allow_scalar import ClimaCore.Utilities.UnrolledFunctions: unrolled_map -allow_scalar_func(::ClimaComms.CUDADevice, f, args) = - CUDA.@allowscalar f(args...) +allow_scalar(f, ::ClimaComms.CUDADevice, args...) = CUDA.@allowscalar f(args...) is_CuArray_type(::Type{T}) where {T <: CUDA.CuArray} = true diff --git a/src/ClimaCore.jl b/src/ClimaCore.jl index db809ec597..b9b8b7e0b6 100644 --- a/src/ClimaCore.jl +++ b/src/ClimaCore.jl @@ -2,6 +2,7 @@ module ClimaCore using PkgVersion const VERSION = PkgVersion.@Version +import ClimaComms include("interface.jl") include("devices.jl") diff --git a/src/MatrixFields/MatrixFields.jl b/src/MatrixFields/MatrixFields.jl index b437af042c..85b62876a9 100644 --- a/src/MatrixFields/MatrixFields.jl +++ b/src/MatrixFields/MatrixFields.jl @@ -63,6 +63,7 @@ import ..Spaces import ..Spaces: local_geometry_type import ..Fields import ..Operators +import ..allow_scalar using ..Utilities.UnrolledFunctions @@ -117,11 +118,9 @@ function Base.show(io::IO, field::ColumnwiseBandMatrixField) end column_field = Fields.column(field, 1, 1, 1) io = IOContext(io, :compact => true, :limit => true) - allow_scalar_func( - ClimaComms.device(field), - Base.print_array, - (io, column_field2array_view(column_field)), - ) + allow_scalar(ClimaComms.device(field)) do + Base.print_array(io, column_field2array_view(column_field)) + end else # When a BandedMatrix with non-number entries is printed, it currently # either prints in an illegible format (e.g., if it has AxisTensor or diff --git a/src/MatrixFields/field2arrays.jl b/src/MatrixFields/field2arrays.jl index 26b599f3d4..31015ef7b6 100644 --- a/src/MatrixFields/field2arrays.jl +++ b/src/MatrixFields/field2arrays.jl @@ -53,23 +53,21 @@ function column_field2array(field::Fields.FiniteDifferenceField) last_row = matrix_d < n_cols - n_rows ? n_rows : n_cols - matrix_d diagonal_data_view = view(diagonal_data, first_row:last_row) - allow_scalar_func( - ClimaComms.device(field), - copyto!, - (matrix_diagonal, diagonal_data_view), - ) + allow_scalar(ClimaComms.device(field)) do + copyto!(matrix_diagonal, diagonal_data_view) + end + allow_scalar(ClimaComms.device(field)) do + copyto!(matrix_diagonal, diagonal_data_view) + end end return matrix else # field represents a vector - return allow_scalar_func( - ClimaComms.device(field), - Array, - (column_field2array_view(field),), - ) + return allow_scalar(ClimaComms.device(field)) do + Array(column_field2array_view(field)) + end end end -allow_scalar_func(::ClimaComms.AbstractDevice, f, args) = f(args...) """ column_field2array_view(field) diff --git a/src/interface.jl b/src/interface.jl index ceb5217eda..4c7b14a7cf 100644 --- a/src/interface.jl +++ b/src/interface.jl @@ -41,3 +41,6 @@ Base.@propagate_inbounds column_args(args::Tuple{Any}, inds...) = Base.@propagate_inbounds column_args(args::Tuple{}, inds...) = () function level end + +# TODO: move to ClimaComms +allow_scalar(f, ::ClimaComms.AbstractDevice, args...) = f(args...) diff --git a/test/DataLayouts/cuda.jl b/test/DataLayouts/cuda.jl index 685048d684..8aa2996920 100644 --- a/test/DataLayouts/cuda.jl +++ b/test/DataLayouts/cuda.jl @@ -3,7 +3,9 @@ julia -g2 --check-bounds=yes --project=test using Revise; include(joinpath("test", "DataLayouts", "cuda.jl")) =# using Test +using ClimaComms using CUDA +ClimaComms.@import_required_backends using ClimaCore.DataLayouts function knl_copy!(dst, src) @@ -20,14 +22,16 @@ function knl_copy!(dst, src) end function test_copy!(dst, src) - @cuda threads = (4, 4) blocks = (10,) knl_copy!(dst, src) + CUDA.@cuda threads = (4, 4) blocks = (10,) knl_copy!(dst, src) end @testset "data in GPU kernels" begin S = Tuple{Complex{Float64}, Float64} - src = IJFH{S, 4}(CuArray(rand(4, 4, 3, 10))) - dst = IJFH{S, 4}(CuArray(zeros(4, 4, 3, 10))) + device = ClimaComms.device() + ArrayType = ClimaComms.array_type(device) + src = IJFH{S, 4}(ArrayType(rand(4, 4, 3, 10))) + dst = IJFH{S, 4}(ArrayType(zeros(4, 4, 3, 10))) test_copy!(dst, src) @@ -38,8 +42,10 @@ end FT = Float64 S1 = NamedTuple{(:a, :b), Tuple{Complex{Float64}, Float64}} S2 = Float64 - data_arr1 = CuArray(ones(FT, 2, 2, 3, 2)) - data_arr2 = CuArray(ones(FT, 2, 2, 1, 2)) + device = ClimaComms.device() + ArrayType = ClimaComms.array_type(device) + data_arr1 = ArrayType(ones(FT, 2, 2, 3, 2)) + data_arr2 = ArrayType(ones(FT, 2, 2, 1, 2)) data1 = IJFH{S1, 2}(data_arr1) data2 = IJFH{S2, 2}(data_arr2) @@ -49,8 +55,8 @@ end @test Array(parent(res)) == FT[2 for i in 1:2, j in 1:2, f in 1:1, h in 1:2] Nv = 33 - data_arr1 = CuArray(ones(FT, Nv, 4, 4, 3, 2)) - data_arr2 = CuArray(ones(FT, Nv, 4, 4, 1, 2)) + data_arr1 = ArrayType(ones(FT, Nv, 4, 4, 3, 2)) + data_arr2 = ArrayType(ones(FT, Nv, 4, 4, 1, 2)) data1 = VIJFH{S1, Nv, 4}(data_arr1) data2 = VIJFH{S2, Nv, 4}(data_arr2) @@ -65,19 +71,21 @@ end @testset "broadcasting assignment from scalar" begin FT = Float64 S = Complex{FT} - data = IJFH{S, 2}(CuArray{FT}, 3) + device = ClimaComms.device() + ArrayType = ClimaComms.array_type(device) + data = IJFH{S, 2}(ArrayType{FT}, 3) data .= Complex(1.0, 2.0) @test Array(parent(data)) == FT[f == 1 ? 1 : 2 for i in 1:2, j in 1:2, f in 1:2, h in 1:3] Nv = 33 - data = VIJFH{S, Nv, 4}(CuArray{FT}(undef, Nv, 4, 4, 2, 3)) + data = VIJFH{S, Nv, 4}(ArrayType{FT}(undef, Nv, 4, 4, 2, 3)) data .= Complex(1.0, 2.0) @test Array(parent(data)) == FT[ f == 1 ? 1 : 2 for v in 1:Nv, i in 1:4, j in 1:4, f in 1:2, h in 1:3 ] - data = DataF{S}(CuArray{FT}) + data = DataF{S}(ArrayType{FT}) data .= Complex(1.0, 2.0) @test Array(parent(data)) == FT[f == 1 ? 1 : 2 for f in 1:2] end diff --git a/test/Fields/field.jl b/test/Fields/field.jl index b91b583c6d..cb81b68977 100644 --- a/test/Fields/field.jl +++ b/test/Fields/field.jl @@ -7,6 +7,7 @@ using Test using JET using ClimaComms +ClimaComms.@import_required_backends using OrderedCollections using StaticArrays, IntervalSets import ClimaCore @@ -26,8 +27,6 @@ import ClimaCore: using LinearAlgebra: norm using Statistics: mean using ForwardDiff -using CUDA -using CUDA: @allowscalar include( joinpath(pkgdir(ClimaCore), "test", "TestUtilities", "TestUtilities.jl"), @@ -346,8 +345,10 @@ end @test !Fields.is_diagonal_bc(bc2) end -function call_getcolumn(fv, colidx) - @allowscalar fvcol = fv[colidx] +function call_getcolumn(fv, colidx, device) + ClimaCore.allow_scalar(device) do + fvcol = fv[colidx] + end nothing end function call_getproperty(fv) @@ -361,20 +362,22 @@ end f = fill((x = Float32(1), y = Float32(2)), fspace) fv = Fields.FieldVector(; c, f) colidx = Fields.ColumnIndex((1, 1), 1) # arbitrary index + device = ClimaComms.device() - @allowscalar @test all(parent(fv.c.a[colidx]) .== Float32(1)) - @allowscalar @test all(parent(fv.f.y[colidx]) .== Float32(2)) - @allowscalar @test propertynames(fv) == propertynames(fv[colidx]) + ClimaCore.allow_scalar(device) do + @test all(parent(fv.c.a[colidx]) .== Float32(1)) + @test all(parent(fv.f.y[colidx]) .== Float32(2)) + @test propertynames(fv) == propertynames(fv[colidx]) + end # JET tests # prerequisite call_getproperty(fv) # compile first @test_opt call_getproperty(fv) - call_getcolumn(fv, colidx) # compile first - @test_opt call_getcolumn(fv, colidx) - p = @allocated call_getcolumn(fv, colidx) - device = ClimaComms.device() + call_getcolumn(fv, colidx, device) # compile first + @test_opt call_getcolumn(fv, colidx, device) + p = @allocated call_getcolumn(fv, colidx, device) if ClimaComms.SingletonCommsContext(device) isa ClimaComms.AbstractCPUDevice @test p ≤ 32 end @@ -822,7 +825,7 @@ convergence_rate(err, Δh) = zcf = Fields.coordinate_field(Y.y).z Δz = Fields.Δz_field(axes(zcf)) Δz_col = Δz[Fields.ColumnIndex((1, 1), 1)] - Δz_1 = CUDA.allowscalar() do + Δz_1 = ClimaCore.allow_scalar(device) do parent(Δz_col)[1] end key = zelem diff --git a/test/Fields/field_multi_broadcast_fusion.jl b/test/Fields/field_multi_broadcast_fusion.jl index a08689e8b2..5d6b128720 100644 --- a/test/Fields/field_multi_broadcast_fusion.jl +++ b/test/Fields/field_multi_broadcast_fusion.jl @@ -9,6 +9,7 @@ using JET using BenchmarkTools using ClimaComms +ClimaComms.@import_required_backends using OrderedCollections using StaticArrays, IntervalSets import ClimaCore @@ -30,8 +31,6 @@ import ClimaCore.Fields: @fused_direct using LinearAlgebra: norm using Statistics: mean using ForwardDiff -using CUDA -using CUDA: @allowscalar util_file = joinpath(pkgdir(ClimaCore), "test", "TestUtilities", "TestUtilities.jl") @@ -319,7 +318,6 @@ end @testset "FusedMultiBroadcast IJFH" begin FT = Float64 device = ClimaComms.device() - ArrayType = device isa ClimaComms.CUDADevice ? CuArray : Array sem_space = TU.SphereSpectralElementSpace(FT; context = ClimaComms.context(device)) IJFH_data() = Fields.Field(FT, sem_space) @@ -342,7 +340,6 @@ end @testset "FusedMultiBroadcast VF" begin FT = Float64 device = ClimaComms.device() - ArrayType = device isa ClimaComms.CUDADevice ? CuArray : Array colspace = TU.ColumnCenterFiniteDifferenceSpace( FT; zelem = 3, @@ -361,7 +358,7 @@ end @testset "FusedMultiBroadcast DataF" begin FT = Float64 device = ClimaComms.device() - ArrayType = device isa ClimaComms.CUDADevice ? CuArray : Array + ArrayType = ClimaComms.array_type(device) DataF_data() = DataF{FT}(ArrayType(ones(FT, 2))) X = Fields.FieldVector(; x1 = DataF_data(), diff --git a/test/Fields/reduction_cuda.jl b/test/Fields/reduction_cuda.jl index ab5db17a16..7e2f808ef9 100644 --- a/test/Fields/reduction_cuda.jl +++ b/test/Fields/reduction_cuda.jl @@ -1,6 +1,6 @@ using Test -using CUDA using ClimaComms +ClimaComms.@import_required_backends using Statistics using LinearAlgebra diff --git a/test/Fields/reduction_cuda_distributed.jl b/test/Fields/reduction_cuda_distributed.jl index 985a3ba5cf..091669ce3c 100644 --- a/test/Fields/reduction_cuda_distributed.jl +++ b/test/Fields/reduction_cuda_distributed.jl @@ -1,5 +1,4 @@ using Test -using CUDA using ClimaComms ClimaComms.@import_required_backends using Statistics diff --git a/test/Operators/finitedifference/column.jl b/test/Operators/finitedifference/column.jl index 325a294455..2bb88d42b3 100644 --- a/test/Operators/finitedifference/column.jl +++ b/test/Operators/finitedifference/column.jl @@ -1,11 +1,10 @@ using Test using StaticArrays, IntervalSets, LinearAlgebra -import ClimaComms +using ClimaComms +ClimaComms.@import_required_backends import ClimaCore: slab, Domains, Meshes, Topologies, Spaces, Fields, Operators import ClimaCore.Domains: Geometry -import CUDA -CUDA.allowscalar(false) device = ClimaComms.device() diff --git a/test/Operators/finitedifference/opt_examples.jl b/test/Operators/finitedifference/opt_examples.jl index 41389c874f..8e374a93d3 100644 --- a/test/Operators/finitedifference/opt_examples.jl +++ b/test/Operators/finitedifference/opt_examples.jl @@ -1,4 +1,6 @@ -import ClimaCore, ClimaComms, CUDA +import ClimaCore +using ClimaComms +ClimaComms.@import_required_backends using BenchmarkTools @isdefined(TU) || include( joinpath(pkgdir(ClimaCore), "test", "TestUtilities", "TestUtilities.jl"), @@ -577,10 +579,6 @@ end p_allocated = @allocated set_ᶠuₕ³!(ᶜx, ᶠx) @show p_allocated - trial = if device isa ClimaComms.CUDADevice - @benchmark CUDA.@sync set_ᶠuₕ³!($ ᶜx, $ᶠx) - else - @benchmark set_ᶠuₕ³!($ ᶜx, $ᶠx) - end + trial = @benchmark ClimaComms.@cuda_sync $device set_ᶠuₕ³!($ ᶜx, $ᶠx) show(stdout, MIME("text/plain"), trial) end diff --git a/test/Operators/hybrid/extruded_3dbox_cuda.jl b/test/Operators/hybrid/extruded_3dbox_cuda.jl index 07d0941d6f..7983f53962 100644 --- a/test/Operators/hybrid/extruded_3dbox_cuda.jl +++ b/test/Operators/hybrid/extruded_3dbox_cuda.jl @@ -3,8 +3,9 @@ julia --project using Revise; include(joinpath("test", "Spaces", "extruded_3dbox_cuda.jl")) =# using LinearAlgebra, IntervalSets -using CUDA -using ClimaComms, ClimaCore +using ClimaComms +ClimaComms.@import_required_backends +using ClimaCore import ClimaCore: Domains, Topologies, @@ -158,8 +159,6 @@ end cos.(coords_gpu.z), ) - CUDA.allowscalar(false) - # Test weak grad operator wgrad = Operators.WeakGradient() @test parent(wgrad.(f_cpu)) ≈ Array(parent(wgrad.(f_gpu))) diff --git a/test/Operators/hybrid/extruded_sphere_cuda.jl b/test/Operators/hybrid/extruded_sphere_cuda.jl index d98cbc2bfa..1b62b207d0 100644 --- a/test/Operators/hybrid/extruded_sphere_cuda.jl +++ b/test/Operators/hybrid/extruded_sphere_cuda.jl @@ -3,8 +3,9 @@ julia --project using Revise; include(joinpath("test", "Spaces", "extruded_sphere_cuda.jl")) =# using LinearAlgebra, IntervalSets -using CUDA -using ClimaComms, ClimaCore +using ClimaComms +ClimaComms.@import_required_backends +using ClimaCore import ClimaCore: Domains, Topologies, @@ -74,7 +75,7 @@ end 2 .* cos.(coords_cpu.long .+ coords_cpu.lat), ) x_gpu = Geometry.UVWVector.(cosd.(coords_gpu.lat), 0.0, 0.0) - CUDA.allowscalar(false) + f_gpu = sin.(coords_gpu.lat .+ 2 .* coords_gpu.long) g_gpu = Geometry.UVVector.(