payu-org · jo-basevi · May 19, 2024 · May 3, 2024 · May 3, 2024 · May 3, 2024
diff --git a/payu/envmod.py b/payu/envmod.py
@@ -17,6 +17,22 @@
 DEFAULT_BASEPATH = '/opt/Modules'
 DEFAULT_VERSION = 'v4.3.0'
 
+MODULE_NOT_FOUND_HELP = """ To fix module not being found:
+- Check module name and version in config.yaml (listed under `modules: load:`)
+- If module is found in a module directory, ensure this path is listed in
+config.yaml under `modules: use:`, or run `module use` command prior to running
+payu commands.
+"""
+
+MULTIPLE_MODULES_HELP = """ To fix having multiple modules available:
+- Add version to the module in config.yaml (under `modules: load:`)
+- Modify module directories in config.yaml (under `modules: use:`)
+- Or modify module directories in user environment by using module use/unuse
+commands, e.g.:
+    $ module use dir # Add dir to $MODULEPATH
+    $ module unuse dir # Remove dir from $MODULEPATH
+"""
+
 
 def setup(basepath=DEFAULT_BASEPATH):
     """Set the environment modules used by the Environment Module system."""
@@ -109,4 +125,79 @@ def lib_update(required_libs, lib_name):
             return '{0}/{1}'.format(mod_name, mod_version)
 
     # If there are no libraries, return an empty string
-    return ''
+    return ''
+
+
+def setup_user_modules(user_modules, user_modulepaths):
+    """Run module use + load commands for user-defined modules"""
+
+    if 'MODULESHOME' not in os.environ:
+        print(
+            'payu: warning: No Environment Modules found; ' +
+            'skipping running module use/load commands for any module ' +
+            'directories/modulefiles defined in config.yaml')
+        return
+
+    # Add user-defined directories to MODULEPATH
+    for modulepath in user_modulepaths:
+        if not os.path.isdir(modulepath):
+            raise ValueError(
+                f"Module directory is not found: {modulepath}" +
+                "\n Check paths listed under `modules: use:` in config.yaml")
+
+        module('use', modulepath)
+
+    for modulefile in user_modules:
+        # Check module exists and there is not multiple available
+        module_subcommand = f"avail --terse {modulefile}"
+        output = run_cmd(module_cmd(module_subcommand)).stderr
+
+        # Extract out the modulefiles available
+        modules = [line for line in output.strip().splitlines()
+                   if not (line.startswith('/') and line.endswith(':'))]
+
+        if len(modules) > 1:
+            # Modules are used for finding model executable paths - so check
+            # for unique module -TODO: Could be a warning rather than an error?
+            raise ValueError(
+                f"There are multiple modules available for {modulefile}:\n" +
+                f"{output}\n{MULTIPLE_MODULES_HELP}")
+        elif len(modules) == 0:
+            raise ValueError(
+                f"Module is not found: {modulefile}\n{MODULE_NOT_FOUND_HELP}"
+            )
+
+        # Load module
+        module('load', modulefile)
+
+
+def env_var_set_by_modules(user_modules, env_var):
+    """Return an environment variable post loading only user-defined modules
+    - this is used for getting $PATH for searching for the model executable"""
+    if 'MODULESHOME' not in os.environ:
+        print('payu: warning: No Environment Modules found; skipping '
+              f'inspecting user module changes to ${env_var}')
+        return
+
+    # Note: Using subprocess shell to isolate changes to environment
+    load_commands = [f'load {module}' for module in user_modules]
+    commands = ['purge'] + load_commands
+    module_cmds = [f"eval `{module_cmd(c)}`" for c in commands]
+    module_cmds += [f'echo ${env_var}']
+    command = ' && '.join(module_cmds)
+    output = run_cmd(command)
+
+    # Extract out $env_var from output
+    output.check_returncode()
+    lines = output.stdout.strip().split('\n')
+    return lines[-1]
+
+
+def module_cmd(command):
+    """Format module subcommand using modulecmd"""
+    return f"{os.environ['MODULESHOME']}/bin/modulecmd bash {command}"
+
+
+def run_cmd(command):
+    """Wrapper around subprocess command that captures output"""
+    return subprocess.run(command, shell=True, text=True, capture_output=True)
diff --git a/payu/experiment.py b/payu/experiment.py
@@ -132,6 +132,8 @@ def __init__(self, lab, reproduce=False, force=False):
 
         self.run_id = None
 
+        self.user_modules_path = None
+
     def init_models(self):
 
         self.model_name = self.config.get('model')
@@ -220,9 +222,30 @@ def set_stacksize(self, stacksize):
         resource.setrlimit(resource.RLIMIT_STACK,
                            (stacksize, resource.RLIM_INFINITY))
 
-    def load_modules(self):
-        # NOTE: This function is increasingly irrelevant, and may be removable.
+    def setup_modules(self):
+        """Setup modules and get paths added to $PATH by user-modules"""
+        envmod.setup()
+
+        # Get user modules info from config
+        self.user_modulepaths = self.config.get('modules', {}).get('use', [])
+        self.user_modules = self.config.get('modules', {}).get('load', [])
+
+        # Run module use + load commands for user-defined modules
+        envmod.setup_user_modules(self.user_modules, self.user_modulepaths)
+
+        # Get paths and loaded modules post loading only the user modules
+        self.user_modules_path = envmod.env_var_set_by_modules(
+            self.user_modules, 'PATH'
+        )
+
+        # Store list of all modules loaded by user-modules
+        self.loaded_user_modules = envmod.env_var_set_by_modules(
+            self.user_modules, 'LOADEDMODULES'
+        )
+        if self.loaded_user_modules is not None:
+            self.loaded_user_modules = self.loaded_user_modules.split(':')
 
+    def load_modules(self):
         # Scheduler
         sched_modname = self.config.get('scheduler', 'pbs')
         self.modules.add(sched_modname)
@@ -245,18 +268,14 @@ def load_modules(self):
             if len(mod) > 0:
                 print('mod '+mod)
                 mod_base = mod.split('/')[0]
-                if mod_base not in core_modules:
+                if (mod_base not in core_modules and
+                        mod not in self.loaded_user_modules):
                     envmod.module('unload', mod)
 
         # Now load model-dependent modules
         for mod in self.modules:
             envmod.module('load', mod)
 
-        # User-defined modules
-        user_modules = self.config.get('modules', {}).get('load', [])
-        for mod in user_modules:
-            envmod.module('load', mod)
-
         envmod.module('list')
 
         for prof in self.profilers:
@@ -414,6 +433,11 @@ def setup(self, force_archive=False):
 
         make_symlink(self.work_path, self.work_sym_path)
 
+        # Set up executable paths - first search through paths added by modules
+        self.setup_modules()
+        for model in self.models:
+            model.setup_executable_paths()
+
         # Set up all file manifests
         self.manifest.setup()
 
@@ -453,13 +477,6 @@ def setup(self, force_archive=False):
             self.get_restarts_to_prune()
 
     def run(self, *user_flags):
-        # XXX: This was previously done in reversion
-        envmod.setup()
-
-        # Add any user-defined module dir(s) to MODULEPATH
-        for module_dir in self.config.get('modules', {}).get('use', []):
-            envmod.module('use', module_dir)
-
         self.load_modules()
 
         f_out = open(self.stdout_fname, 'w')
@@ -804,6 +821,9 @@ def archive(self, force_prune_restarts=False):
             self.postprocess()
 
     def collate(self):
+        # Setup modules - load user-defined modules
+        self.setup_modules()
+
         for model in self.models:
             model.collate()
 

diff --git a/payu/models/fms.py b/payu/models/fms.py
@@ -76,8 +76,7 @@ def fms_collate(model):
     mpi = collate_config.get('mpi', False)
 
     if mpi:
-        # Must use envmod to be able to load mpi modules for collation
-        envmod.setup()
+        # Load mpi modules for collation
         model.expt.load_modules()
         default_exe = 'mppnccombine-fast'
     else:
@@ -92,8 +91,7 @@ def fms_collate(model):
                 mppnc_path = os.path.join(model.expt.lab.bin_path, f)
                 break
     else:
-        if not os.path.isabs(mppnc_path):
-            mppnc_path = os.path.join(model.expt.lab.bin_path, mppnc_path)
+        mppnc_path = model.expand_executable_path(mppnc_path)
 
     assert mppnc_path, 'No mppnccombine program found'
 

diff --git a/payu/models/model.py b/payu/models/model.py
@@ -82,21 +82,6 @@ def set_model_pathnames(self):
         self.work_output_path = self.work_path
         self.work_init_path = self.work_path
 
-        self.exec_prefix = self.config.get('exe_prefix', '')
-        self.exec_name = self.config.get('exe', self.default_exec)
-        if self.exec_name:
-            # By default os.path.join will not prepend the lab bin_path
-            # to an absolute path
-            self.exec_path = os.path.join(self.expt.lab.bin_path,
-                                          self.exec_name)
-        else:
-            self.exec_path = None
-        if self.exec_path:
-            # Make exec_name consistent for models with fully qualified path.
-            # In all cases it will just be the name of the executable without a
-            # path
-            self.exec_name = os.path.basename(self.exec_path)
-
     def set_local_pathnames(self):
 
         # This is the path relative to the control directory, required for
@@ -129,12 +114,6 @@ def set_local_pathnames(self):
                 os.path.relpath(self.work_init_path, self.expt.work_path)
             )
         )
-        if self.exec_path:
-            # Local path in work directory
-            self.exec_path_local = os.path.join(
-                self.work_path_local,
-                os.path.basename(self.exec_path)
-            )
 
     def set_input_paths(self):
         if len(self.expt.models) == 1:
@@ -198,6 +177,59 @@ def get_prior_restart_files(self):
             print("No prior restart files found: {error}".format(error=str(e)))
             return []
 
+    def expand_executable_path(self, exec):
+        """Given an executable, return the expanded executable path"""
+        # Check if exe is already an absolute path
+        if os.path.isabs(exec):
+            return exec
+
+        # Check if path set by loading user modules has been defined
+        module_added_path = self.expt.user_modules_path
+        if module_added_path is None:
+            print("payu: warning: Skipping searching for model executable " +
+                  "in $PATH set by user modules")
+            module_added_paths = []
+        elif module_added_path == '':
+            module_added_paths = []
+        else:
+            module_added_paths = module_added_path.split(':')
+
+        # Search for exe inside paths added to $PATH by user-defined modules
+        exec_paths = []
+        for path in module_added_paths:
+            exec_path = os.path.join(path, exec)
+            if os.path.exists(exec_path) and os.access(exec_path, os.X_OK):
+                exec_paths.append(exec_path)
+
+        if len(exec_paths) > 1:
+            raise ValueError(
+                f"Executable {exec} found in multiple $PATH paths added by " +
+                f"user-defined modules in `config.yaml`. Paths: {exec_paths}")
+        elif len(exec_paths) == 1:
+            return exec_paths[0]
+
+        # Else prepend the lab bin path to exec
+        return os.path.join(self.expt.lab.bin_path, exec)
+
+    def setup_executable_paths(self):
+        """Set model executable paths"""
+        self.exec_prefix = self.config.get('exe_prefix', '')
+        self.exec_name = self.config.get('exe', self.default_exec)
+        self.exec_path = None
+        if self.exec_name:
+            self.exec_path = self.expand_executable_path(self.exec_name)
+
+            # Make exec_name consistent for models with fully qualified path.
+            # In all cases it will just be the name of the executable without a
+            # path
+            self.exec_name = os.path.basename(self.exec_path)
+
+            # Local path in work directory
+            self.exec_path_local = os.path.join(
+                self.work_path_local,
+                os.path.basename(self.exec_path)
+            )
+
     def setup_configuration_files(self):
         """Copy configuration and optional configuration files from control
          path to work path"""
@@ -339,6 +371,7 @@ def collate(self):
         raise NotImplementedError
 
     def build_model(self):
+        self.setup_executable_paths()
 
         if not self.repo_url:
             return