Skip to content

Commit

Permalink
debugging weights bcast
Browse files Browse the repository at this point in the history
  • Loading branch information
juliasloan25 committed Jun 27, 2023
1 parent 60568fc commit 5c91ecc
Show file tree
Hide file tree
Showing 6 changed files with 61 additions and 9 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
Manifest.toml
LocalPreferences.toml

# Files related to sbatch script output
*.out
CI_OUTPUT_DIR*

# Files generated by invoking Julia with --code-coverage
*.jl.cov
*.jl.*.cov
Expand Down
2 changes: 2 additions & 0 deletions lib/ClimaCoreTempestRemap/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ version = "0.3.7"
ClimaComms = "3a4d1b5c-c61d-41fd-a00a-5873ba7a1b0d"
ClimaCore = "d414da3d-4745-48bb-8d80-42e94e092884"
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
IntervalSets = "8197267c-284f-5f27-9208-e0e47529a953"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195"
NCDatasets = "85f8d34a-cbdd-5861-8df4-14fed0d494ab"
Expand All @@ -24,3 +25,4 @@ julia = "1.7"

[extras]
CPUSummary = "2a0fbf3d-bb9c-48f3-b0a9-814d99fd7ab9"
MPIPreferences = "3da0fdf6-3ccc-4f1b-acd9-58baa6c99267"
12 changes: 12 additions & 0 deletions lib/ClimaCoreTempestRemap/script.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/bin/bash
module purge
module load julia/1.8.1 openmpi/4.1.1 hdf5/1.12.1-ompi411

export CLIMACORE_DISTRIBUTED="MPI"
export JULIA_HDF5_PATH=""
export GKSwstype=100 # if plotting

julia --project -e 'using Pkg; Pkg.instantiate(); Pkg.build()'
julia --project -e 'using Pkg; Pkg.build("MPI"); Pkg.build("HDF5")'
julia --project -e 'using MPIPreferences; MPIPreferences.use_system_binary()'
julia --project -e 'include("test/mpi_tests/run_mpi.jl")'
33 changes: 26 additions & 7 deletions lib/ClimaCoreTempestRemap/test/mpi_tests/distr_regrid_example.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ using ClimaComms
using MPI
using ClimaCore:
Geometry, Meshes, Domains, Topologies, Spaces, Fields, Operators
using IntervalSets

using ClimaCore.Spaces: Quadratures
using SparseArrays
Expand Down Expand Up @@ -33,6 +34,8 @@ comm = comms_ctx.mpicomm
rank = MPI.Comm_rank(comm)
root_pid = 0

comms_ctx_serial = ClimaComms.SingletonCommsContext()

# construct domain
domain = Domains.RectangleDomain(
Geometry.XPoint(-1.0) .. Geometry.XPoint(1.0),
Expand All @@ -42,26 +45,38 @@ domain = Domains.RectangleDomain(
)

# construct distributed source space
source_nq = 2
source_nq = 3
source_nex = 1
source_ney = 2
source_space = make_space(domain, source_nq, source_nex, source_ney, comms_ctx)
source_space_serial = make_space(domain, source_nq, source_nex, source_ney, comms_ctx_serial)

# construct distributed target space
target_nq = 2
target_nq = 3
target_nex = 1
target_ney = 3
target_space = make_space(domain, target_nq, target_nex, target_ney, comms_ctx)
target_space_serial = make_space(domain, target_nq, target_nex, target_ney, comms_ctx_serial)

# generate weights (no remapping in x direction, so we really only need y_weights)
# TODO
if ClimaComms.iamroot(comms_ctx)
x_weights = Operators.x_overlap(target_space, source_space)
y_weights = Operators.y_overlap(target_space, source_space)
weights = kron(x_weights, y_weights)
# x_weights = Operators.x_overlap(target_space, source_space) # TODO doesn't work distributedly
# y_weights = Operators.y_overlap(target_space, source_space)
# weights = kron(x_weights, y_weights)
weights = Operators.overlap(target_space, source_space)
arr = [1, 2, 3]
else
weights = nothing
weights = nothing # TODO I think weights should be initialized as a SparseMatrixCSC but not sure how since we don't have the row/col lengths
arr = nothing
end
MPI.bcast(weights, root_pid, comm)
ClimaComms.bcast(comms_ctx, weights)
ClimaComms.bcast(comms_ctx, arr)
ClimaComms.barrier(comms_ctx)
# MPI.Bcast!(weights, root_pid, comm)
# MPI.Barrier(comm)
@show arr
@show weights

# TODO reorder weights produced by kronecker product - or manually create weights from x_weights, y_weights

Expand Down Expand Up @@ -136,6 +151,10 @@ Nf = size(source_array, 3) # Nf is number of fields being remapped
# loop over rows in weight matrix, compute dot prod for each row
# TODO convert these to sparse representations later on
send_row_sums = FT.(zeros(weights.m))

@show target_ind_to_pid
@show weights.m

local_rows = findall(j -> target_ind_to_pid[j] == pid, collect(1:(weights.m)))
n_local_rows = length(local_rows)
local_row_sums = FT.(zeros(n_local_rows))
Expand Down
15 changes: 15 additions & 0 deletions lib/ClimaCoreTempestRemap/test/mpi_tests/run_mpi.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
using MPI

function runmpi(file; ntasks = 1)
MPI.mpiexec() do cmd
Base.run(
`$cmd -n $ntasks $(Base.julia_cmd()) --startup-file=no --project=$(Base.active_project()) $file`;
wait = true,
)
true
end
end

if !Sys.iswindows()
runmpi(joinpath(@__DIR__, "distr_regrid_example.jl"), ntasks = 2)
end
4 changes: 2 additions & 2 deletions src/Operators/remapping.jl
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,9 @@ function overlap(
source::S,
) where {
T <:
SpectralElementSpace2D{<:Topology2D{<:ClimaComms.SingletonCommsContext}},
SpectralElementSpace2D{<:Topology2D{<:Union{ClimaComms.SingletonCommsContext, ClimaComms.MPICommsContext}}},
S <:
SpectralElementSpace2D{<:Topology2D{<:ClimaComms.SingletonCommsContext}},
SpectralElementSpace2D{<:Topology2D{<:Union{ClimaComms.SingletonCommsContext, ClimaComms.MPICommsContext}}},
}
@assert (
typeof(Spaces.topology(target).mesh) <: Meshes.RectilinearMesh &&
Expand Down

0 comments on commit 5c91ecc

Please sign in to comment.