diff --git a/.gitignore b/.gitignore index 4849aa36aa..4a832065a9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,10 @@ Manifest.toml LocalPreferences.toml +# Files related to sbatch script output +*.out +CI_OUTPUT_DIR* + # Files generated by invoking Julia with --code-coverage *.jl.cov *.jl.*.cov diff --git a/lib/ClimaCoreTempestRemap/Project.toml b/lib/ClimaCoreTempestRemap/Project.toml index ec6535be60..54cb11cec3 100644 --- a/lib/ClimaCoreTempestRemap/Project.toml +++ b/lib/ClimaCoreTempestRemap/Project.toml @@ -7,6 +7,7 @@ version = "0.3.7" ClimaComms = "3a4d1b5c-c61d-41fd-a00a-5873ba7a1b0d" ClimaCore = "d414da3d-4745-48bb-8d80-42e94e092884" Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" +IntervalSets = "8197267c-284f-5f27-9208-e0e47529a953" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" NCDatasets = "85f8d34a-cbdd-5861-8df4-14fed0d494ab" @@ -24,3 +25,4 @@ julia = "1.7" [extras] CPUSummary = "2a0fbf3d-bb9c-48f3-b0a9-814d99fd7ab9" +MPIPreferences = "3da0fdf6-3ccc-4f1b-acd9-58baa6c99267" diff --git a/lib/ClimaCoreTempestRemap/script.sh b/lib/ClimaCoreTempestRemap/script.sh new file mode 100644 index 0000000000..713abea86f --- /dev/null +++ b/lib/ClimaCoreTempestRemap/script.sh @@ -0,0 +1,12 @@ +#!/bin/bash +module purge +module load julia/1.8.1 openmpi/4.1.1 hdf5/1.12.1-ompi411 + +export CLIMACORE_DISTRIBUTED="MPI" +export JULIA_HDF5_PATH="" +export GKSwstype=100 # if plotting + +julia --project -e 'using Pkg; Pkg.instantiate(); Pkg.build()' +julia --project -e 'using Pkg; Pkg.build("MPI"); Pkg.build("HDF5")' +julia --project -e 'using MPIPreferences; MPIPreferences.use_system_binary()' +julia --project -e 'include("test/mpi_tests/run_mpi.jl")' diff --git a/lib/ClimaCoreTempestRemap/test/mpi_tests/distr_regrid_example.jl b/lib/ClimaCoreTempestRemap/test/mpi_tests/distr_regrid_example.jl index 1bbf4f0bb2..0c371881b4 100644 --- a/lib/ClimaCoreTempestRemap/test/mpi_tests/distr_regrid_example.jl +++ b/lib/ClimaCoreTempestRemap/test/mpi_tests/distr_regrid_example.jl @@ -6,6 +6,7 @@ using ClimaComms using MPI using ClimaCore: Geometry, Meshes, Domains, Topologies, Spaces, Fields, Operators +using IntervalSets using ClimaCore.Spaces: Quadratures using SparseArrays @@ -33,6 +34,8 @@ comm = comms_ctx.mpicomm rank = MPI.Comm_rank(comm) root_pid = 0 +comms_ctx_serial = ClimaComms.SingletonCommsContext() + # construct domain domain = Domains.RectangleDomain( Geometry.XPoint(-1.0) .. Geometry.XPoint(1.0), @@ -42,26 +45,38 @@ domain = Domains.RectangleDomain( ) # construct distributed source space -source_nq = 2 +source_nq = 3 source_nex = 1 source_ney = 2 source_space = make_space(domain, source_nq, source_nex, source_ney, comms_ctx) +source_space_serial = make_space(domain, source_nq, source_nex, source_ney, comms_ctx_serial) # construct distributed target space -target_nq = 2 +target_nq = 3 target_nex = 1 target_ney = 3 target_space = make_space(domain, target_nq, target_nex, target_ney, comms_ctx) +target_space_serial = make_space(domain, target_nq, target_nex, target_ney, comms_ctx_serial) # generate weights (no remapping in x direction, so we really only need y_weights) +# TODO if ClimaComms.iamroot(comms_ctx) - x_weights = Operators.x_overlap(target_space, source_space) - y_weights = Operators.y_overlap(target_space, source_space) - weights = kron(x_weights, y_weights) + # x_weights = Operators.x_overlap(target_space, source_space) # TODO doesn't work distributedly + # y_weights = Operators.y_overlap(target_space, source_space) + # weights = kron(x_weights, y_weights) + weights = Operators.overlap(target_space, source_space) + arr = [1, 2, 3] else - weights = nothing + weights = nothing # TODO I think weights should be initialized as a SparseMatrixCSC but not sure how since we don't have the row/col lengths + arr = nothing end -MPI.bcast(weights, root_pid, comm) +ClimaComms.bcast(comms_ctx, weights) +ClimaComms.bcast(comms_ctx, arr) +ClimaComms.barrier(comms_ctx) +# MPI.Bcast!(weights, root_pid, comm) +# MPI.Barrier(comm) +@show arr +@show weights # TODO reorder weights produced by kronecker product - or manually create weights from x_weights, y_weights @@ -136,6 +151,10 @@ Nf = size(source_array, 3) # Nf is number of fields being remapped # loop over rows in weight matrix, compute dot prod for each row # TODO convert these to sparse representations later on send_row_sums = FT.(zeros(weights.m)) + +@show target_ind_to_pid +@show weights.m + local_rows = findall(j -> target_ind_to_pid[j] == pid, collect(1:(weights.m))) n_local_rows = length(local_rows) local_row_sums = FT.(zeros(n_local_rows)) diff --git a/lib/ClimaCoreTempestRemap/test/mpi_tests/run_mpi.jl b/lib/ClimaCoreTempestRemap/test/mpi_tests/run_mpi.jl new file mode 100644 index 0000000000..af9cb90140 --- /dev/null +++ b/lib/ClimaCoreTempestRemap/test/mpi_tests/run_mpi.jl @@ -0,0 +1,15 @@ +using MPI + +function runmpi(file; ntasks = 1) + MPI.mpiexec() do cmd + Base.run( + `$cmd -n $ntasks $(Base.julia_cmd()) --startup-file=no --project=$(Base.active_project()) $file`; + wait = true, + ) + true + end +end + +if !Sys.iswindows() + runmpi(joinpath(@__DIR__, "distr_regrid_example.jl"), ntasks = 2) +end diff --git a/src/Operators/remapping.jl b/src/Operators/remapping.jl index fe9cedc1e2..4f9c1dfa04 100644 --- a/src/Operators/remapping.jl +++ b/src/Operators/remapping.jl @@ -82,9 +82,9 @@ function overlap( source::S, ) where { T <: - SpectralElementSpace2D{<:Topology2D{<:ClimaComms.SingletonCommsContext}}, + SpectralElementSpace2D{<:Topology2D{<:Union{ClimaComms.SingletonCommsContext, ClimaComms.MPICommsContext}}}, S <: - SpectralElementSpace2D{<:Topology2D{<:ClimaComms.SingletonCommsContext}}, + SpectralElementSpace2D{<:Topology2D{<:Union{ClimaComms.SingletonCommsContext, ClimaComms.MPICommsContext}}}, } @assert ( typeof(Spaces.topology(target).mesh) <: Meshes.RectilinearMesh &&