Skip to content

Commit

Permalink
Use Devito's GPU magic block size in benchmark scripts and provide be…
Browse files Browse the repository at this point in the history
…nchmarking scripts.

Use Devito's GPU magic block size in benchmark scripts.
  • Loading branch information
PapyChacal authored Oct 24, 2023
2 parents 578a3f7 + 3f85cd2 commit 69685e1
Show file tree
Hide file tree
Showing 6 changed files with 126 additions and 4 deletions.
8 changes: 7 additions & 1 deletion fast/diffusion_2D_wBCs.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,13 @@
configuration['mpi'] = mpiconf

if args.devito:
op = Operator([eq_stencil], name='DevitoOperator')

# To measure Devito at its best on GPU, we have to set the tile siwe manually
opt = None
if configuration['platform'].name == 'nvidiaX':
opt = ('advanced', {'par-tile': (32, 4, 8)})

op = Operator([eq_stencil], name='DevitoOperator', opt=opt)
op.apply(time=nt, dt=dt, a=nu)
print("Devito Field norm is:", norm(u))

Expand Down
8 changes: 7 additions & 1 deletion fast/diffusion_3D_wBCs.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,13 @@
if args.devito:
u.data[:, :, :, :] = 0
u.data[:, :, :, int(nz/2)] = 1
op = Operator([eq_stencil], name='DevitoOperator')

# To measure Devito at its best on GPU, we have to set the tile siwe manually
opt = None
if configuration['platform'].name == 'nvidiaX':
opt = ('advanced', {'par-tile': (32, 4, 8)})
op = Operator([eq_stencil], name='DevitoOperator', opt=opt)

# Apply the operator for a number of timesteps
op.apply(time=nt, dt=dt, a=nu)
print("Devito Field norm is:", norm(u))
Expand Down
36 changes: 36 additions & 0 deletions fast/gpu.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/bin/bash

# Simple script to run single threaded benchmarks locally,
# for simple sanity checks.

# Use the cray compiler, if available.
export DEVITO_PLATFORM=nvidiaX
# Enable debug logging.
export DEVITO_LOGGING=BENCH

# Just extract the reported throughput from the whole output of the passed command
# For GPU computing, we currently measure only the compute part
# That is, we exclude the data copying to and from the device.
get_throughput() {
#echo $($@)
$@ |& grep section0 | head -n 1 | cut -d ' ' -f12
}

# Iterate over benchmarks and cases, print simple CSV data to stdout
# Copy-pastes nicely in Google Sheets
echo bench_name,so,Devito,xDSL
for bench in "wave2d_b.py -d 8192 8192 --nt 1024" "wave3d_b.py -d 512 512 512 --nt 512" "diffusion_2D_wBCs.py -d 8192 8192 --nt 1024" "diffusion_3D_wBCs.py -d 512 512 512 --nt 512"
# for bench in "wave2d_b.py -d 8192 8192 --nt 1024" "diffusion_2D_wBCs.py -d 8192 8192 --nt 1024"
do
# Get the benchmark file for printing
bench_name=$(echo $bench | cut -d ' ' -f1)
# Iterate over measured space orders
for so in 2 4 8
do
# Get the throughputs
devito_time=$(get_throughput python $bench -so $so --devito 1)
xdsl_time=$(get_throughput python $bench -so $so --xdsl 1)
# print CSV line
echo $bench_name,$so,$devito_time,$xdsl_time
done
done
64 changes: 64 additions & 0 deletions fast/slurm-jobs/gpu.slurm
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#!/bin/bash

# Slurm job options (job-name, compute nodes, job time)
#SBATCH --job-name=Devito_GPU
#SBATCH --time=00:15:00
#SBATCH --nodes=1
#SBATCH --gres=gpu:1

# Replace [budget code] below with your project code (e.g. t01)
#SBATCH --account=d011
#SBATCH --partition=gpu
#SBATCH --qos=short
#SBATCH -o ./jobs-output/gpu.%j.out # STDOUT

SHARED=/work/d011/d011/shared
module use $SHARED/modules

export DEVITO_ARCH=nvc
export DEVITO_PLATFORM=nvidiaX
export DEVITO_LANGUAGE=openacc

module load sc-23
cd $SHARED/software/devito/fast

# Propagate the cpus-per-task setting from script to srun commands
# By default, Slurm does not propagate this setting from the sbatch
# options to srun commands in the job script. If this is not done,
# process/thread pinning may be incorrect leading to poor performance
export SRUN_CPUS_PER_TASK=$SLURM_CPUS_PER_TASK

# Simple script to run single threaded benchmarks locally,
# for simple sanity checks.

# Use the cray compiler, if available.
export DEVITO_PLATFORM=nvidiaX
# Enable debug logging.
export DEVITO_LOGGING=BENCH

# Just extract the reported throughput from the whole output of the passed command
# For GPU computing, we currently measure only the compute part
# That is, we exclude the data copying to and from the device.
get_throughput() {
#echo $($@)
$@ |& grep section0 | head -n 1 | cut -d ' ' -f12
}

# Iterate over benchmarks and cases, print simple CSV data to stdout
# Copy-pastes nicely in Google Sheets
echo bench_name,so,Devito,xDSL
for bench in "wave2d_b.py -d 8192 8192 --nt 1024" "wave3d_b.py -d 512 512 512 --nt 512" "diffusion_2D_wBCs.py -d 8192 8192 --nt 1024" "diffusion_3D_wBCs.py -d 512 512 512 --nt 512"
# for bench in "wave2d_b.py -d 8192 8192 --nt 1024" "diffusion_2D_wBCs.py -d 8192 8192 --nt 1024"
do
# Get the benchmark file for printing
bench_name=$(echo $bench | cut -d ' ' -f1)
# Iterate over measured space orders
for so in 2 4 8
do
# Get the throughputs
devito_time=$(get_throughput srun python $bench -so $so --devito 1)
xdsl_time=$(get_throughput srun python $bench -so $so --xdsl 1)
# print CSV line
echo $bench_name,$so,$devito_time,$xdsl_time
done
done
7 changes: 6 additions & 1 deletion fast/wave2d_b.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,14 @@
print("Init norm:", np.linalg.norm(u.data[:]))

if args.devito:
# To measure Devito at its best on GPU, we have to set the tile siwe manually
opt = None
if configuration['platform'].name == 'nvidiaX':
opt = ('advanced', {'par-tile': (32, 4, 8)})

# Run more with no sources now (Not supported in xdsl)
# op1 = Operator([stencil], name='DevitoOperator', subs=grid.spacing_map)
op1 = Operator([stencil], name='DevitoOperator')
op1 = Operator([stencil], name='DevitoOperator', opt=opt)
op1.apply(time=nt, dt=dt)

if len(shape) == 2 and args.plot:
Expand Down
7 changes: 6 additions & 1 deletion fast/wave3d_b.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,14 @@


if args.devito:
# To measure Devito at its best on GPU, we have to set the tile siwe manually
opt = None
if configuration['platform'].name == 'nvidiaX':
opt = ('advanced', {'par-tile': (32, 4, 8)})

# Run more with no sources now (Not supported in xdsl)
# op1 = Operator([stencil], name='DevitoOperator', subs=grid.spacing_map)
op1 = Operator([stencil], name='DevitoOperator')
op1 = Operator([stencil], name='DevitoOperator', opt=opt)
op1.apply(time=nt, dt=dt)

if len(shape) == 3 and args.plot:
Expand Down

0 comments on commit 69685e1

Please sign in to comment.