forked from devitocodes/devito
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Switch to separate batch files for thread numbers.
- Loading branch information
1 parent
16e0ae3
commit 1fc89fc
Showing
8 changed files
with
518 additions
and
19 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
#!/bin/bash | ||
|
||
# Slurm job options (job-name, compute nodes, job time) | ||
#SBATCH --job-name=Devito_OpenMP_1 | ||
#SBATCH --time=01:00:00 | ||
#SBATCH --nodes=1 | ||
#SBATCH --ntasks-per-node=1 | ||
#SBATCH --cpus-per-task=1 | ||
#SBATCH --switches=1@360 # Each group has 128 nodes | ||
|
||
# Replace [budget code] below with your project code (e.g. t01) | ||
#SBATCH --account=d011 | ||
#SBATCH --partition=standard | ||
#SBATCH --qos=standard | ||
#SBATCH -o ./jobs-output/openmp-1.%j.out # STDOUT | ||
|
||
# Propagate the cpus-per-task setting from script to srun commands | ||
# By default, Slurm does not propagate this setting from the sbatch | ||
# options to srun commands in the job script. If this is not done, | ||
# process/thread pinning may be incorrect leading to poor performance | ||
export SRUN_CPUS_PER_TASK=$SLURM_CPUS_PER_TASK | ||
|
||
export SHARED=/work/d011/d011/shared | ||
module use $SHARED/modules | ||
module load sc-23 | ||
module load cray-mpich | ||
|
||
|
||
cd $SHARED/software/devito/fast | ||
# Simple script to run multithreaded benchmarks locally, | ||
# for simple sanity checks. | ||
|
||
# We want one thread per physical core | ||
export OMP_PLACES=cores | ||
|
||
# Devito-specific env variables | ||
export DEVITO_ARCH=cray | ||
export DEVITO_LANGUAGE=openmp | ||
export DEVITO_LOGGING=BENCH | ||
unset DEVITO_MPI | ||
export DEVITO_AUTOTUNING=aggressive | ||
|
||
export OMP_PROC_BIND=true | ||
|
||
export CRAY_OMP_CHECK_AFFINITY=TRUE | ||
export SLURM_CPU_FREQ_REQ=2250000 | ||
export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK | ||
|
||
# Just extract the reported throughput from the whole output of the passed command | ||
get_throughput() { | ||
# echo $($@) | ||
$@ |& grep Global | head -n 1 | cut -d ' ' -f6 | ||
} | ||
|
||
# Iterate over benchmarks and cases, print simple CSV data to stdout | ||
# Copy-pastes nicely in Google Sheets | ||
echo bench_name,so,Devito,xDSL | ||
for bench in "wave2d_b.py -d 8192 8192 --nt 1024" "wave3d_b.py -d 512 512 512 --nt 512" "diffusion_2D_wBCs.py -d 8192 8192 --nt 1024" "diffusion_3D_wBCs.py -d 512 512 512 --nt 512" | ||
do | ||
# Get the benchmark file for printing | ||
bench_name=$(echo $bench | cut -d ' ' -f1) | ||
# Iterate over measured space orders | ||
for so in 2 4 8 | ||
do | ||
|
||
# Get the throughputs | ||
devito_time=$(get_throughput srun --distribution=block:block --hint=nomultithread python $bench -so $so --devito 1) | ||
xdsl_time=$(get_throughput srun --distribution=block:block --hint=nomultithread python $bench -so $so --xdsl 1) | ||
# print CSV line | ||
echo $bench_name,$so,$devito_time,$xdsl_time | ||
done | ||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
#!/bin/bash | ||
|
||
# Slurm job options (job-name, compute nodes, job time) | ||
#SBATCH --job-name=Devito_OpenMP_16 | ||
#SBATCH --time=01:00:00 | ||
#SBATCH --nodes=1 | ||
#SBATCH --ntasks-per-node=1 | ||
#SBATCH --cpus-per-task=16 | ||
#SBATCH --switches=1@360 # Each group has 128 nodes | ||
|
||
# Replace [budget code] below with your project code (e.g. t01) | ||
#SBATCH --account=d011 | ||
#SBATCH --partition=standard | ||
#SBATCH --qos=standard | ||
#SBATCH -o ./jobs-output/openmp-16.%j.out # STDOUT | ||
|
||
# Propagate the cpus-per-task setting from script to srun commands | ||
# By default, Slurm does not propagate this setting from the sbatch | ||
# options to srun commands in the job script. If this is not done, | ||
# process/thread pinning may be incorrect leading to poor performance | ||
export SRUN_CPUS_PER_TASK=$SLURM_CPUS_PER_TASK | ||
|
||
export SHARED=/work/d011/d011/shared | ||
module use $SHARED/modules | ||
module load sc-23 | ||
module load cray-mpich | ||
|
||
|
||
cd $SHARED/software/devito/fast | ||
# Simple script to run multithreaded benchmarks locally, | ||
# for simple sanity checks. | ||
|
||
# We want one thread per physical core | ||
export OMP_PLACES=cores | ||
|
||
# Devito-specific env variables | ||
export DEVITO_ARCH=cray | ||
export DEVITO_LANGUAGE=openmp | ||
export DEVITO_LOGGING=BENCH | ||
unset DEVITO_MPI | ||
export DEVITO_AUTOTUNING=aggressive | ||
|
||
export OMP_PROC_BIND=true | ||
|
||
export CRAY_OMP_CHECK_AFFINITY=TRUE | ||
export SLURM_CPU_FREQ_REQ=2250000 | ||
export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK | ||
|
||
# Just extract the reported throughput from the whole output of the passed command | ||
get_throughput() { | ||
# echo $($@) | ||
$@ |& grep Global | head -n 1 | cut -d ' ' -f6 | ||
} | ||
|
||
# Iterate over benchmarks and cases, print simple CSV data to stdout | ||
# Copy-pastes nicely in Google Sheets | ||
echo bench_name,so,Devito,xDSL | ||
for bench in "wave2d_b.py -d 8192 8192 --nt 1024" "wave3d_b.py -d 512 512 512 --nt 512" "diffusion_2D_wBCs.py -d 8192 8192 --nt 1024" "diffusion_3D_wBCs.py -d 512 512 512 --nt 512" | ||
do | ||
# Get the benchmark file for printing | ||
bench_name=$(echo $bench | cut -d ' ' -f1) | ||
# Iterate over measured space orders | ||
for so in 2 4 8 | ||
do | ||
|
||
# Get the throughputs | ||
devito_time=$(get_throughput srun --distribution=block:block --hint=nomultithread python $bench -so $so --devito 1) | ||
xdsl_time=$(get_throughput srun --distribution=block:block --hint=nomultithread python $bench -so $so --xdsl 1) | ||
# print CSV line | ||
echo $bench_name,$so,$devito_time,$xdsl_time | ||
done | ||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
#!/bin/bash | ||
|
||
# Slurm job options (job-name, compute nodes, job time) | ||
#SBATCH --job-name=Devito_OpenMP_2 | ||
#SBATCH --time=01:00:00 | ||
#SBATCH --nodes=1 | ||
#SBATCH --ntasks-per-node=2 | ||
#SBATCH --cpus-per-task=1 | ||
#SBATCH --switches=1@360 # Each group has 128 nodes | ||
|
||
# Replace [budget code] below with your project code (e.g. t01) | ||
#SBATCH --account=d011 | ||
#SBATCH --partition=standard | ||
#SBATCH --qos=standard | ||
#SBATCH -o ./jobs-output/openmp.%j.out # STDOUT | ||
|
||
# Propagate the cpus-per-task setting from script to srun commands | ||
# By default, Slurm does not propagate this setting from the sbatch | ||
# options to srun commands in the job script. If this is not done, | ||
# process/thread pinning may be incorrect leading to poor performance | ||
export SRUN_CPUS_PER_TASK=$SLURM_CPUS_PER_TASK | ||
|
||
export SHARED=/work/d011/d011/shared | ||
module use $SHARED/modules | ||
module load sc-23 | ||
module load cray-mpich | ||
|
||
|
||
cd $SHARED/software/devito/fast | ||
# Simple script to run multithreaded benchmarks locally, | ||
# for simple sanity checks. | ||
|
||
# We want one thread per physical core | ||
export OMP_PLACES=cores | ||
|
||
# Devito-specific env variables | ||
export DEVITO_ARCH=cray | ||
export DEVITO_LANGUAGE=openmp | ||
export DEVITO_LOGGING=BENCH | ||
unset DEVITO_MPI | ||
export DEVITO_AUTOTUNING=aggressive | ||
|
||
export OMP_PROC_BIND=true | ||
|
||
export CRAY_OMP_CHECK_AFFINITY=TRUE | ||
export SLURM_CPU_FREQ_REQ=2250000 | ||
export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK | ||
|
||
# Just extract the reported throughput from the whole output of the passed command | ||
get_throughput() { | ||
# echo $($@) | ||
$@ |& grep Global | head -n 1 | cut -d ' ' -f6 | ||
} | ||
|
||
# Iterate over benchmarks and cases, print simple CSV data to stdout | ||
# Copy-pastes nicely in Google Sheets | ||
echo bench_name,so,Devito,xDSL | ||
for bench in "wave2d_b.py -d 8192 8192 --nt 1024" "wave3d_b.py -d 512 512 512 --nt 512" "diffusion_2D_wBCs.py -d 8192 8192 --nt 1024" "diffusion_3D_wBCs.py -d 512 512 512 --nt 512" | ||
do | ||
# Get the benchmark file for printing | ||
bench_name=$(echo $bench | cut -d ' ' -f1) | ||
# Iterate over measured space orders | ||
for so in 2 4 8 | ||
do | ||
|
||
# Get the throughputs | ||
devito_time=$(get_throughput srun --distribution=block:block --hint=nomultithread python $bench -so $so --devito 1) | ||
xdsl_time=$(get_throughput srun --distribution=block:block --hint=nomultithread python $bench -so $so --xdsl 1) | ||
# print CSV line | ||
echo $bench_name,$so,$devito_time,$xdsl_time | ||
done | ||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
#!/bin/bash | ||
|
||
# Slurm job options (job-name, compute nodes, job time) | ||
#SBATCH --job-name=Devito_OpenMP_32 | ||
#SBATCH --time=01:00:00 | ||
#SBATCH --nodes=1 | ||
#SBATCH --ntasks-per-node=32 | ||
#SBATCH --cpus-per-task=1 | ||
#SBATCH --switches=1@360 # Each group has 128 nodes | ||
|
||
# Replace [budget code] below with your project code (e.g. t01) | ||
#SBATCH --account=d011 | ||
#SBATCH --partition=standard | ||
#SBATCH --qos=standard | ||
#SBATCH -o ./jobs-output/openmp-32.%j.out # STDOUT | ||
|
||
# Propagate the cpus-per-task setting from script to srun commands | ||
# By default, Slurm does not propagate this setting from the sbatch | ||
# options to srun commands in the job script. If this is not done, | ||
# process/thread pinning may be incorrect leading to poor performance | ||
export SRUN_CPUS_PER_TASK=$SLURM_CPUS_PER_TASK | ||
|
||
export SHARED=/work/d011/d011/shared | ||
module use $SHARED/modules | ||
module load sc-23 | ||
module load cray-mpich | ||
|
||
|
||
cd $SHARED/software/devito/fast | ||
# Simple script to run multithreaded benchmarks locally, | ||
# for simple sanity checks. | ||
|
||
# We want one thread per physical core | ||
export OMP_PLACES=cores | ||
|
||
# Devito-specific env variables | ||
export DEVITO_ARCH=cray | ||
export DEVITO_LANGUAGE=openmp | ||
export DEVITO_LOGGING=BENCH | ||
unset DEVITO_MPI | ||
export DEVITO_AUTOTUNING=aggressive | ||
|
||
export OMP_PROC_BIND=true | ||
|
||
export CRAY_OMP_CHECK_AFFINITY=TRUE | ||
export SLURM_CPU_FREQ_REQ=2250000 | ||
export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK | ||
|
||
# Just extract the reported throughput from the whole output of the passed command | ||
get_throughput() { | ||
# echo $($@) | ||
$@ |& grep Global | head -n 1 | cut -d ' ' -f6 | ||
} | ||
|
||
# Iterate over benchmarks and cases, print simple CSV data to stdout | ||
# Copy-pastes nicely in Google Sheets | ||
echo bench_name,so,Devito,xDSL | ||
for bench in "wave2d_b.py -d 8192 8192 --nt 1024" "wave3d_b.py -d 512 512 512 --nt 512" "diffusion_2D_wBCs.py -d 8192 8192 --nt 1024" "diffusion_3D_wBCs.py -d 512 512 512 --nt 512" | ||
do | ||
# Get the benchmark file for printing | ||
bench_name=$(echo $bench | cut -d ' ' -f1) | ||
# Iterate over measured space orders | ||
for so in 2 4 8 | ||
do | ||
|
||
# Get the throughputs | ||
devito_time=$(get_throughput srun --distribution=block:block --hint=nomultithread python $bench -so $so --devito 1) | ||
xdsl_time=$(get_throughput srun --distribution=block:block --hint=nomultithread python $bench -so $so --xdsl 1) | ||
# print CSV line | ||
echo $bench_name,$so,$devito_time,$xdsl_time | ||
done | ||
done |
Oops, something went wrong.