Skip to content

Commit

Permalink
Merge pull request #857 from xylar/update-to-1.5.0-alpha.1
Browse files Browse the repository at this point in the history
Update to 1.5.0-alpha.1
  • Loading branch information
xylar authored Sep 20, 2024
2 parents 869dafe + 34aa147 commit 6ff63b5
Show file tree
Hide file tree
Showing 11 changed files with 87 additions and 15 deletions.
9 changes: 8 additions & 1 deletion compass/job/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,12 @@ def write_job_script(config, machine, target_cores, min_cores, work_dir,
job_name = 'compass'
else:
job_name = f'compass_{suite}'

if config.has_option('parallel', 'gpus_per_node'):
gpus_per_node = config.get('parallel', 'gpus_per_node')
else:
gpus_per_node = ''

wall_time = config.get('job', 'wall_time')

template = Template(resources.read_text(
Expand All @@ -101,7 +107,8 @@ def write_job_script(config, machine, target_cores, min_cores, work_dir,
text = template.render(job_name=job_name, account=account,
nodes=f'{nodes}', wall_time=wall_time, qos=qos,
partition=partition, constraint=constraint,
reservation=reservation, suite=suite,
reservation=reservation,
gpus_per_node=gpus_per_node, suite=suite,
pre_run_commands=pre_run_commands,
post_run_commands=post_run_commands)
text = _clean_up_whitespace(text)
Expand Down
3 changes: 3 additions & 0 deletions compass/job/job_script.template
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
{% if constraint != '' -%}
#SBATCH --constraint={{ constraint }}
{%- endif %}
{% if gpus_per_node != '' -%}
#SBATCH --gpus-per-node={{ gpus_per_node }}
{%- endif %}

source load_compass_env.sh
{{ pre_run_commands }}
Expand Down
44 changes: 44 additions & 0 deletions compass/machines/pm-gpu.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@

# The paths section describes paths that are used within the ocean core test
# cases.
[paths]

# A shared root directory where MPAS standalone data can be found
database_root = /global/cfs/cdirs/e3sm/mpas_standalonedata

# the path to the base conda environment where compass environments have
# been created
compass_envs = /global/common/software/e3sm/compass/pm-gpu/base


# Options related to deploying a compass conda environment on supported
# machines
[deploy]

# the compiler set to use for system libraries and MPAS builds
compiler = gnugpu

# the system MPI library to use for gnugpu compiler
mpi_gnugpu = mpich

# the system MPI library to use for nvidiagpu compiler
mpi_nvidiagpu = mpich

# the base path for spack environments used by compass
spack = /global/cfs/cdirs/e3sm/software/compass/pm-gpu/spack

# whether to use the same modules for hdf5, netcdf-c, netcdf-fortran and
# pnetcdf as E3SM (spack modules are used otherwise)
use_e3sm_hdf5_netcdf = True

# The parallel section describes options related to running jobs in parallel.
# Most options in this section come from mache so here we just add or override
# some defaults
[parallel]

# cores per node on the machine
cores_per_node = 64

# threads per core (set to 1 because trying to hyperthread seems to be causing
# hanging on perlmutter)
threads_per_core = 1
5 changes: 5 additions & 0 deletions compass/parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,11 @@ def get_available_parallel_resources(config):
cores_per_node=cores_per_node,
mpi_allowed=mpi_allowed
)

if config.has_option('parallel', 'gpus_per_node'):
available_resources['gpus_per_node'] = \
config.getint('parallel', 'gpus_per_node')

return available_resources


Expand Down
2 changes: 1 addition & 1 deletion compass/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '1.4.0-alpha.7'
__version__ = '1.5.0-alpha.1'
1 change: 1 addition & 0 deletions conda/albany_supported.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@
chicoma-cpu, gnu, mpich
chrysalis, gnu, openmpi
pm-cpu, gnu, mpich
pm-gpu, gnugpu, mpich
morpheus, gnu, openmpi
18 changes: 14 additions & 4 deletions conda/bootstrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,6 +476,9 @@ def build_spack_env(config, update_spack, machine, compiler, mpi, # noqa: C901
scorpio = config.get('deploy', 'scorpio')
parallelio = config.get('deploy', 'parallelio')

# for now, we'll assume Cuda is needed anytime GPUs are present
with_cuda = config.has_option('parallel', 'gpus_per_node')

if config.has_option('deploy', 'spack_mirror'):
spack_mirror = config.get('deploy', 'spack_mirror')
else:
Expand Down Expand Up @@ -536,8 +539,14 @@ def build_spack_env(config, update_spack, machine, compiler, mpi, # noqa: C901
f'@{parallelio}+pnetcdf~timing"')

if albany != 'None':
specs.append(f'"trilinos-for-albany@{albany}"')
specs.append(f'"albany@{albany}+mpas~py+unit_tests"')
if with_cuda:
albany_cuda = '+cuda+uvm+sfad sfadsize=12'
trilinos_cuda = '+cuda+uvm'
else:
albany_cuda = ''
trilinos_cuda = ''
specs.append(f'"trilinos-for-albany@{albany}{trilinos_cuda}"')
specs.append(f'"albany@{albany}+mpas~py+unit_tests{albany_cuda}"')

yaml_template = f'{spack_template_path}/{machine}_{compiler}_{mpi}.yaml'
if not os.path.exists(yaml_template):
Expand Down Expand Up @@ -1082,8 +1091,9 @@ def main(): # noqa: C901
print('Install local mache\n')
commands = f'source {conda_base}/etc/profile.d/conda.sh && ' \
f'conda activate {conda_env_name} && ' \
'cd ../build_mache/mache && ' \
'python -m pip install --no-deps .'
f'cd ../build_mache/mache && ' \
f'conda install -y --file spec-file.txt && ' \
f'python -m pip install --no-deps .'
check_call(commands, logger=logger)

previous_conda_env = conda_env_name
Expand Down
6 changes: 3 additions & 3 deletions conda/compass_env/spec-file.template
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ ipython
jupyter
lxml
{% if include_mache %}
mache=1.23.0
mache=1.25.0
{% endif %}
matplotlib-base >=3.9.1
metis
Expand Down Expand Up @@ -49,8 +49,8 @@ cmake
cxx-compiler
fortran-compiler
libnetcdf=4.9.2={{ mpi_prefix }}_*
libpnetcdf=1.12.3={{ mpi_prefix }}_*
parallelio=2.6.2={{ mpi_prefix }}_*
libpnetcdf=1.13.0={{ mpi_prefix }}_*
parallelio=2.6.3={{ mpi_prefix }}_*
m4
make
{{ mpi }}
Expand Down
3 changes: 2 additions & 1 deletion conda/configure_compass_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def main():
if local_mache:
mache = ''
else:
mache = '"mache=1.23.0"'
mache = '"mache=1.25.0"'

setup_install_env(env_name, activate_base, args.use_local, logger,
args.recreate, conda_base, mache)
Expand All @@ -114,6 +114,7 @@ def main():
f'git clone -b {args.mache_branch} ' \
f'[email protected]:{args.mache_fork}.git mache && ' \
f'cd mache && ' \
f'conda install -y --file spec-file.txt && ' \
f'python -m pip install --no-deps .'

check_call(commands, logger=logger)
Expand Down
8 changes: 4 additions & 4 deletions conda/default.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ lapack = 3.9.1
metis = 5.1.0
moab = 5.5.1
netcdf_c = 4.9.2
netcdf_fortran = 4.6.0
netcdf_fortran = 4.6.1
petsc = 3.19.1
pnetcdf = 1.12.3
scorpio = 1.6.3
# parallelio = 2.6.2
pnetcdf = 1.13.0
scorpio = 1.6.5
# parallelio = 2.6.3
parallelio = None
3 changes: 2 additions & 1 deletion conda/unsupported.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ compy, pgi, mvapich2
pm-cpu, nvidia, mpich
pm-cpu, aocc, mpich
pm-cpu, amdclang, mpich

pm-gpu, gnu, mpich
pm-gpu, nvidia, mpich

# compiles but tests unreliable (errors or hanging),
# see https://github.com/MPAS-Dev/compass/issues/336
Expand Down

0 comments on commit 6ff63b5

Please sign in to comment.