Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Get module executables from path loaded by environment modules #439

Merged
merged 7 commits into from
May 19, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 71 additions & 1 deletion payu/envmod.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,4 +109,74 @@ def lib_update(required_libs, lib_name):
return '{0}/{1}'.format(mod_name, mod_version)

# If there are no libraries, return an empty string
return ''
return ''


def paths_set_by_user_modules(user_modules, user_modulepaths):
"""Search along changes PATH added by user defined modules
and return a set of paths added - this is used for
searching for the model executable"""
if 'MODULESHOME' not in os.environ:
print('payu: warning: No Environment Modules found; skipping '
'inspecting user module changes to PATH')
return set()

# Orginal environment
previous_env = dict(os.environ)
previous_modulepath = os.environ['MODULEPATH']

# Set restrict module path to only user defined module paths
os.environ['MODULEPATH'] = ':'.join(user_modulepaths)

# Note: Using subprocess shell to isolate changes to environment
paths = []
try:
# Get $PATH paths with no modules loaded
init_paths = paths_post_module_commands(["purge"])
for module in user_modules:
# Check if module is available
module_cmd = f"{os.environ['MODULESHOME']}/bin/modulecmd bash"
cmd = f"{module_cmd} is-avail {module}"
if run_cmd(cmd).returncode != 0:
continue
# TODO: Check if multiple modules are available..
try:
# Get $PATH paths post running module purge && module load
paths.extend(paths_post_module_commands(['purge',
f'load {module}']))
except subprocess.CalledProcessError as e:
continue
finally:
os.environ['MODULEPATH'] = previous_modulepath

if previous_env != os.environ:
print(
"Warning: Env vars changed when inspecting paths set by modules"
)

# Remove inital paths and convert into a set
return set(paths).difference(set(init_paths))


def paths_post_module_commands(commands):
"""Runs subprocess module command and parse out the resulting
PATH environment variable"""
# Use modulecmd as module command is not available on compute nodes
module_cmds = [
jo-basevi marked this conversation as resolved.
Show resolved Hide resolved
f"eval `{os.environ['MODULESHOME']}/bin/modulecmd bash {c}`"
for c in commands
]
command = ' && '.join(module_cmds) + ' && echo $PATH'

# Run Command and check the ouput
output = run_cmd(command)
output.check_returncode()

# Extact out the PATH value, and split the paths
path = output.stdout.strip().split('\n')[-1]
return path.split(':')


def run_cmd(command):
"""Wrapper around subprocess command"""
return subprocess.run(command, shell=True, text=True, capture_output=True)
46 changes: 34 additions & 12 deletions payu/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,9 +220,29 @@ def set_stacksize(self, stacksize):
resource.setrlimit(resource.RLIMIT_STACK,
(stacksize, resource.RLIM_INFINITY))

def load_modules(self):
# NOTE: This function is increasingly irrelevant, and may be removable.
def setup_modules(self):
"""Setup modules and get paths added to $PATH by user-modules"""
envmod.setup()

# Get user modules info from config
self.user_modulepaths = self.config.get('modules', {}).get('use', [])
self.user_modules = self.config.get('modules', {}).get('load', [])

# Get paths added to $PATH by user-modules
self.user_modules_set_paths = envmod.paths_set_by_user_modules(
user_modules=self.user_modules,
user_modulepaths=self.user_modulepaths
)

def run_modules(self):
"""Run module load + use commands"""
# Add any user-defined module dir(s) to MODULEPATH
for module_dir in self.user_modulepaths:
envmod.module('use', module_dir)

self.load_modules()

def load_modules(self):
# Scheduler
sched_modname = self.config.get('scheduler', 'pbs')
self.modules.add(sched_modname)
Expand Down Expand Up @@ -253,8 +273,7 @@ def load_modules(self):
envmod.module('load', mod)

# User-defined modules
user_modules = self.config.get('modules', {}).get('load', [])
for mod in user_modules:
for mod in self.user_modules:
envmod.module('load', mod)

envmod.module('list')
Expand Down Expand Up @@ -414,6 +433,11 @@ def setup(self, force_archive=False):

make_symlink(self.work_path, self.work_sym_path)

# Set up executable paths - first search through paths added by modules
self.setup_modules()
for model in self.models:
model.setup_executable_paths()

# Set up all file manifests
self.manifest.setup()

Expand Down Expand Up @@ -453,14 +477,8 @@ def setup(self, force_archive=False):
self.get_restarts_to_prune()

def run(self, *user_flags):
# XXX: This was previously done in reversion
envmod.setup()

# Add any user-defined module dir(s) to MODULEPATH
for module_dir in self.config.get('modules', {}).get('use', []):
envmod.module('use', module_dir)

self.load_modules()
# Run module use and load commands
self.run_modules()

f_out = open(self.stdout_fname, 'w')
f_err = open(self.stderr_fname, 'w')
Expand Down Expand Up @@ -804,6 +822,10 @@ def archive(self, force_prune_restarts=False):
self.postprocess()

def collate(self):
# Search module added paths & run module use + load commands
self.setup_modules()
self.run_modules()

for model in self.models:
model.collate()

Expand Down
16 changes: 11 additions & 5 deletions payu/models/fms.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,16 @@ def fms_collate(model):
mpi = collate_config.get('mpi', False)

if mpi:
# Must use envmod to be able to load mpi modules for collation
envmod.setup()
model.expt.load_modules()
# TODO: I've added module setup and load functions to
# Experiment.collate - as user modules needed to be use & loaded
# to expand executable paths. However, is loading mpi module
# for non-mpi collate jobs going to cause errors?
# If so, need to only use/load user modules in Experiment.collate
# and only load additional modules for mpi here
aidanheerdegen marked this conversation as resolved.
Show resolved Hide resolved

# # Must use envmod to be able to load mpi modules for collation
# envmod.setup()
# model.expt.load_modules()
default_exe = 'mppnccombine-fast'
else:
default_exe = 'mppnccombine'
Expand All @@ -92,8 +99,7 @@ def fms_collate(model):
mppnc_path = os.path.join(model.expt.lab.bin_path, f)
break
else:
if not os.path.isabs(mppnc_path):
mppnc_path = os.path.join(model.expt.lab.bin_path, mppnc_path)
mppnc_path = model.expand_executable_path(mppnc_path)
aidanheerdegen marked this conversation as resolved.
Show resolved Hide resolved

assert mppnc_path, 'No mppnccombine program found'

Expand Down
71 changes: 50 additions & 21 deletions payu/models/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,21 +82,6 @@ def set_model_pathnames(self):
self.work_output_path = self.work_path
self.work_init_path = self.work_path

self.exec_prefix = self.config.get('exe_prefix', '')
self.exec_name = self.config.get('exe', self.default_exec)
if self.exec_name:
# By default os.path.join will not prepend the lab bin_path
# to an absolute path
self.exec_path = os.path.join(self.expt.lab.bin_path,
self.exec_name)
else:
self.exec_path = None
if self.exec_path:
# Make exec_name consistent for models with fully qualified path.
# In all cases it will just be the name of the executable without a
# path
self.exec_name = os.path.basename(self.exec_path)

def set_local_pathnames(self):

# This is the path relative to the control directory, required for
Expand Down Expand Up @@ -129,12 +114,6 @@ def set_local_pathnames(self):
os.path.relpath(self.work_init_path, self.expt.work_path)
)
)
if self.exec_path:
# Local path in work directory
self.exec_path_local = os.path.join(
self.work_path_local,
os.path.basename(self.exec_path)
)

def set_input_paths(self):
if len(self.expt.models) == 1:
Expand Down Expand Up @@ -198,6 +177,54 @@ def get_prior_restart_files(self):
print("No prior restart files found: {error}".format(error=str(e)))
return []

def expand_executable_path(self, exec, search_module_path=True):
"""Given an executable, return the expanded executable path"""
# Check if exe is already an absolute path
if os.path.isabs(exec):
return exec

if not search_module_path:
module_added_paths = set()
else:
module_added_paths = self.expt.user_modules_set_paths

# Search for exe inside paths added to $PATH by user-defined modules
exec_paths = []
for path in module_added_paths:
exec_path = os.path.join(path, exec)
if os.path.exists(exec_path):
aidanheerdegen marked this conversation as resolved.
Show resolved Hide resolved
exec_paths.append(exec_path)

if len(exec_paths) > 1:
aidanheerdegen marked this conversation as resolved.
Show resolved Hide resolved
raise ValueError(
f"Executable {exec} found in multiple $PATH paths added by " +
f"user-defined modules in `config.yaml`. Paths: {exec_paths}")
elif len(exec_paths) == 1:
return exec_paths[0]

# Else prepend the lab bin path to exec
return os.path.join(self.expt.lab.bin_path, exec)

def setup_executable_paths(self, search_module_paths=True):
"""Set model executable paths"""
self.exec_prefix = self.config.get('exe_prefix', '')
aidanheerdegen marked this conversation as resolved.
Show resolved Hide resolved
self.exec_name = self.config.get('exe', self.default_exec)
self.exec_path = None
if self.exec_name:
self.exec_path = self.expand_executable_path(self.exec_name,
search_module_paths)

# Make exec_name consistent for models with fully qualified path.
# In all cases it will just be the name of the executable without a
# path
self.exec_name = os.path.basename(self.exec_path)

# Local path in work directory
self.exec_path_local = os.path.join(
self.work_path_local,
os.path.basename(self.exec_path)
)

def setup_configuration_files(self):
"""Copy configuration and optional configuration files from control
path to work path"""
Expand Down Expand Up @@ -339,6 +366,8 @@ def collate(self):
raise NotImplementedError

def build_model(self):
# Don't search user modules for executable paths
self.setup_executable_paths(search_module_paths=False)
aidanheerdegen marked this conversation as resolved.
Show resolved Hide resolved

if not self.repo_url:
return
Expand Down
Loading