Skip to content

Commit

Permalink
Merge pull request #2 from xylar/add_discover_machine_function
Browse files Browse the repository at this point in the history
 Break out `discover_machine()` as separate function
  • Loading branch information
xylar authored Sep 15, 2021
2 parents 0a58e0a + cad51ab commit f5eb0b9
Show file tree
Hide file tree
Showing 17 changed files with 346 additions and 41 deletions.
2 changes: 2 additions & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
include mache/cime_machine_config/*.xml
include mache/machines/*.cfg
41 changes: 41 additions & 0 deletions conda/meta.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
{% set name = "mache" %}
{% set version = "1.0.0" %}

package:
name: {{ name|lower }}
version: {{ version }}

source:
path: ..

build:
number: 0
script: {{ PYTHON }} -m pip install . --no-deps -vv
noarch: python

requirements:
host:
- python >=3.6
- pip
run:
- python >=3.6
- lxml

test:

imports:
- mache


about:
home: https://github.com/E3SM-Project/mache
license: BSD-3-Clause
license_family: BSD
license_file: LICENSE
summary: A package for providing configuration data relate to E3SM supported machines
doc_url: https://github.com/E3SM-Project/mache/README.rst
dev_url: https://github.com/E3SM-Project/mache

extra:
recipe-maintainers:
- xylar
11 changes: 11 additions & 0 deletions conda/run_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/usr/bin/env python
from mache import MachineInfo, discover_machine

machine = discover_machine()

machinfo = MachineInfo(machine='anvil')
print(machinfo)

machinfo = MachineInfo(machine='unknown')
print(machinfo)

1 change: 1 addition & 0 deletions mache/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from mache.machine_info import MachineInfo
from mache.discover import discover_machine

__version_info__ = (1, 0, 0)
__version__ = '.'.join(str(vi) for vi in __version_info__)
37 changes: 37 additions & 0 deletions mache/discover.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import socket
import warnings


def discover_machine():
"""
Figure out the machine from the host name
Returns
-------
machine : str
The name of the current machine
"""
hostname = socket.gethostname()
if hostname.startswith('acme1'):
machine = 'acme1'
elif hostname.startswith('andes'):
machine = 'andes'
elif hostname.startswith('blueslogin'):
machine = 'anvil'
elif hostname.startswith('ba-fe'):
machine = 'badger'
elif hostname.startswith('chrlogin'):
machine = 'chrysalis'
elif hostname.startswith('compy'):
machine = 'compy'
elif hostname.startswith('cooley'):
machine = 'cooley'
elif hostname.startswith('cori'):
warnings.warn('defaulting to cori-haswell. Use -m cori-knl if you'
' wish to run on KNL.')
machine = 'cori-haswell'
elif hostname.startswith('gr-fe'):
machine = 'grizzly'
else:
machine = None
return machine
94 changes: 61 additions & 33 deletions mache/machine_info.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import socket
import warnings
from lxml import etree
from importlib.resources import path
import configparser
import os

from mache.discover import discover_machine


class MachineInfo:
"""
Expand Down Expand Up @@ -57,9 +57,11 @@ def __init__(self, machine=None):
The name of an E3SM supported machine. By default, the machine
will be inferred from the host name
"""
if machine is None:
machine = discover_machine()
if machine is None:
raise ValueError('Unable to discover machine form host name')
self.machine = machine
if self.machine is None:
self._discover_machine()

self.config = self._get_config()

Expand Down Expand Up @@ -88,14 +90,16 @@ def __str__(self):
"""

info = f'Machine: {self.machine}\n' \
f'E3SM Supported Machine? {self.e3sm_supported}'
f' E3SM Supported Machine: {self.e3sm_supported}'

if self.e3sm_supported:
info = f'{info}\n' \
f' Compilers: {", ".join(self.compilers)}\n' \
f' MPI libraries: {", ".join(self.mpilibs)}\n' \
f' OS: {self.os}'

info = f'{info}\n'

print_unified = (self.e3sm_unified_activation is not None or
self.e3sm_unified_base is not None or
self.e3sm_unified_mpi is not None)
Expand All @@ -115,6 +119,7 @@ def __str__(self):
if self.e3sm_unified_mpi is not None:
info = f'{info}\n' \
f' MPI type: {self.e3sm_unified_mpi}'
info = f'{info}\n'

print_diags = self.diagnostics_base is not None
if print_diags:
Expand All @@ -124,7 +129,17 @@ def __str__(self):
if self.diagnostics_base is not None:
info = f'{info}\n' \
f' Base path: {self.diagnostics_base}'
info = f'{info}\n'

info = f'{info}\n' \
f'Config options:'
for section in self.config.sections():
info = f'{info}\n' \
f' [{section}]'
for key, value in self.config.items(section):
info = f'{info}\n' \
f' {key} = {value}'
info = f'{info}\n'
return info

def get_modules_and_mpi_compilers(self, compiler, mpilib):
Expand Down Expand Up @@ -253,34 +268,45 @@ def get_modules_and_mpi_compilers(self, compiler, mpilib):

return mpicc, mpicxx, mpifc, mod_commands

def _discover_machine(self):
""" Figure out the machine from the host name """
if self.machine is not None:
return
hostname = socket.gethostname()
if hostname.startswith('acme1'):
machine = 'acme1'
elif hostname.startswith('andes'):
machine = 'andes'
elif hostname.startswith('blueslogin'):
machine = 'anvil'
elif hostname.startswith('ba-fe'):
machine = 'badger'
elif hostname.startswith('chrlogin'):
machine = 'chrysalis'
elif hostname.startswith('compy'):
machine = 'compy'
elif hostname.startswith('cooley'):
machine = 'cooley'
elif hostname.startswith('cori'):
warnings.warn('defaulting to cori-haswell. Use -m cori-knl if you'
' wish to run on KNL.')
machine = 'cori-haswell'
elif hostname.startswith('gr-fe'):
machine = 'grizzly'
def get_account_defaults(self):
"""
Get default account, partition and quality of service (QOS) for
this machine.
Returns
-------
account : str
The E3SM account on the machine
partition : str
The default partition on the machine, or ``None`` if no partition
should be specified
qos : str
The default quality of service on the machine, or ``None`` if no
QOS should be specified
"""
config = self.config
if config.has_option('parallel', 'account'):
account = config.get('parallel', 'account')
else:
raise ValueError('Unable to discover machine form host name')
self.machine = machine
account = None

if config.has_option('parallel', 'partitions'):
partition = config.get('parallel', 'partitions')
# take the first entry
partition = partition.split(',')[0].strip()
else:
partition = None

if config.has_option('parallel', 'qos'):
qos = config.get('parallel', 'qos')
# take the first entry
qos = qos.split(',')[0].strip()
else:
qos = None

return account, partition, qos

def _get_config(self):
""" get a parser for config options """
Expand Down Expand Up @@ -310,11 +336,13 @@ def _parse_compilers_and_mpi(self):
machines = next(root.iter('config_machines'))

mach = None
found = False
for mach in machines:
if mach.tag == 'machine' and mach.attrib['MACH'] == machine:
found = True
break

if mach is None:
if not found:
# this is not an E3SM supported machine, so we're done
self.e3sm_supported = False
return
Expand Down
13 changes: 13 additions & 0 deletions mache/machines/acme1.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,16 @@ base_path = /usr/local/e3sm_unified/envs

# The base path to the diagnostics directory
base_path = /space2/diagnostics


# The parallel section describes options related to running jobs in parallel
[parallel]

# parallel system of execution: slurm, cobalt or single_node
system = single_node

# whether to use mpirun or srun to run a task
parallel_executable = mpirun

# cores per node on the machine
cores_per_node = 192
19 changes: 19 additions & 0 deletions mache/machines/andes.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,22 @@ base_path = /ccs/proj/cli900/sw/rhea/e3sm-unified

# The base path to the diagnostics directory
base_path = /gpfs/alpine/proj-shared/cli115/diagnostics/


# The parallel section describes options related to running jobs in parallel
[parallel]

# parallel system of execution: slurm, cobalt or single_node
system = slurm

# whether to use mpirun or srun to run a task
parallel_executable = srun

# cores per node on the machine
cores_per_node = 32

# account for running diagnostics jobs
account = cli115

# available partition(s) (default is the first)
partitions = batch
24 changes: 23 additions & 1 deletion mache/machines/anvil.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,26 @@ base_path = /lcrc/soft/climate/e3sm-unified
[diagnostics]

# The base path to the diagnostics directory
base_path = /lcrc/group/e3sm/diagnostics
base_path = /lcrc/group/e3sm/diagnostics


# The parallel section describes options related to running jobs in parallel
[parallel]

# parallel system of execution: slurm, cobalt or single_node
system = slurm

# whether to use mpirun or srun to run a task
parallel_executable = srun

# cores per node on the machine
cores_per_node = 36

# account for running diagnostics jobs
account = condo

# available partition(s) (default is the first)
partitions = acme-small, acme-medium, acme-large

# quality of service (default is the first)
qos = regular, acme_high
19 changes: 19 additions & 0 deletions mache/machines/badger.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,22 @@ base_path = /turquoise/usr/projects/climate/SHARED_CLIMATE/anaconda_envs

# The base path to the diagnostics directory
base_path = /turquoise/usr/projects/climate/SHARED_CLIMATE/diagnostic


# The parallel section describes options related to running jobs in parallel
[parallel]

# parallel system of execution: slurm, cobalt or single_node
system = slurm

# whether to use mpirun or srun to run a task
parallel_executable = srun

# cores per node on the machine
cores_per_node = 36

# account for running diagnostics jobs
account = e3sm

# quality of service (default is the first)
qos = regular, interactive
18 changes: 17 additions & 1 deletion mache/machines/chrysalis.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,20 @@ base_path = /lcrc/soft/climate/e3sm-unified
[diagnostics]

# The base path to the diagnostics directory
base_path = /lcrc/group/e3sm/diagnostics
base_path = /lcrc/group/e3sm/diagnostics


# The parallel section describes options related to running jobs in parallel
[parallel]

# parallel system of execution: slurm, cobalt or single_node
system = slurm

# whether to use mpirun or srun to run a task
parallel_executable = srun

# cores per node on the machine
cores_per_node = 64

# available partition(s) (default is the first)
partitions = debug, compute, high
Loading

0 comments on commit f5eb0b9

Please sign in to comment.