Add payu test suite for spatial configuration

Spatial tests use the [payu framework][payu]. The payu framework was chosen so that we: - Encourage uptake of payu amongst users of CABLE - Have the foundations in place for running coupled models (atmosphere + land) with payu - Can easily test longer running simulations (payu makes it easy to run a model multiple times and have state persist in the model via restart files) The design of the spatial tests assumes each payu experiment is tailored to running CABLE with a specific meteorological forcing. This has the benefit that all the required inputs are already defined in the payu configuration file. An alternative would be to build up the spatial namelist configurations from scratch. This would be problematic as it is unclear if CABLE requires 'forcing specific' namelist options to be enabled to run with a particular met forcing. That is, CABLE does not allow for easy plug and play with different met forcings via the namelist file. The run directory structure is organised as follows: runs/ ├── spatial │ └── tasks │ ├── <spatial-task-name> (a payu control / experiment directory) │ └── ... ├── payu-laboratory │ └── ... └── fluxsite └── ... Note we have a separate payu-laboratory directory. This is so we keep all CABLE outputs produced by benchcab under the bench_example work directory. This change includes the following additional features: - Add the ability to build the CABLE executable with MPI at runtime so that we run the spatial configurations with MPI. - Add the --mpi flag to benchcab build command so that the user can run the MPI build step independently. - Add subcommands to run each step of the spatial workflow in isolation. - Add payu key in the benchcab config file so that users can easily configure payu experiments and add optional command line arguments to the payu run command. - Add met_forcings key to specify different met forcings and their respective payu experiment. Fixes #5 [payu]: https://github.com/payu-org/payu [cable_example]: https://github.com/CABLE-LSM/cable_example
CABLE-LSM · Dec 1, 2023 · 270162d · 270162d
1 parent ded19e5
commit 270162d
Show file tree

Hide file tree

Showing 20 changed files with 1,098 additions and 317 deletions.
diff --git a/benchcab/benchcab.py b/benchcab/benchcab.py
@@ -10,24 +10,20 @@
 from subprocess import CalledProcessError
 from typing import Optional
 
-from benchcab import internal
+from benchcab import fluxsite, internal, spatial
 from benchcab.comparison import run_comparisons, run_comparisons_in_parallel
 from benchcab.config import read_config
 from benchcab.environment_modules import EnvironmentModules, EnvironmentModulesInterface
-from benchcab.fluxsite import (
-    Task,
-    get_fluxsite_comparisons,
-    get_fluxsite_tasks,
-    run_tasks,
-    run_tasks_in_parallel,
-)
 from benchcab.internal import get_met_forcing_file_names
 from benchcab.model import Model
 from benchcab.utils.fs import mkdir, next_path
 from benchcab.utils.pbs import render_job_script
 from benchcab.utils.repo import SVNRepo, create_repo
 from benchcab.utils.subprocess import SubprocessWrapper, SubprocessWrapperInterface
-from benchcab.workdir import setup_fluxsite_directory_tree
+from benchcab.workdir import (
+    setup_fluxsite_directory_tree,
+    setup_spatial_directory_tree,
+)
 
 
 class Benchcab:
@@ -47,7 +43,8 @@ def __init__(
 
         self._config: Optional[dict] = None
         self._models: list[Model] = []
-        self.tasks: list[Task] = []  # initialise fluxsite tasks lazily
+        self._fluxsite_tasks: list[fluxsite.FluxsiteTask] = []
+        self._spatial_tasks: list[spatial.SpatialTask] = []
 
     def _validate_environment(self, project: str, modules: list):
         """Performs checks on current user environment."""
@@ -114,20 +111,34 @@ def _get_models(self, config: dict) -> list[Model]:
                 self._models.append(Model(repo=repo, model_id=id, **sub_config))
         return self._models
 
-    def _initialise_tasks(self, config: dict) -> list[Task]:
-        """A helper method that initialises and returns the `tasks` attribute."""
-        self.tasks = get_fluxsite_tasks(
-            models=self._get_models(config),
-            science_configurations=config.get(
-                "science_configurations", internal.DEFAULT_SCIENCE_CONFIGURATIONS
-            ),
-            fluxsite_forcing_file_names=get_met_forcing_file_names(
-                config.get("fluxsite", {}).get(
-                    "experiment", internal.FLUXSITE_DEFAULT_EXPERIMENT
-                )
-            ),
-        )
-        return self.tasks
+    def _get_fluxsite_tasks(self, config: dict) -> list[fluxsite.FluxsiteTask]:
+        if not self._fluxsite_tasks:
+            self._fluxsite_tasks = fluxsite.get_fluxsite_tasks(
+                models=self._get_models(config),
+                science_configurations=config.get(
+                    "science_configurations", internal.DEFAULT_SCIENCE_CONFIGURATIONS
+                ),
+                fluxsite_forcing_file_names=get_met_forcing_file_names(
+                    config.get("fluxsite", {}).get(
+                        "experiment", internal.FLUXSITE_DEFAULT_EXPERIMENT
+                    )
+                ),
+            )
+        return self._fluxsite_tasks
+
+    def _get_spatial_tasks(self, config) -> list[spatial.SpatialTask]:
+        if not self._spatial_tasks:
+            self._spatial_tasks = spatial.get_spatial_tasks(
+                models=self._get_models(config),
+                met_forcings=config.get("spatial", {}).get(
+                    "met_forcings", internal.SPATIAL_DEFAULT_MET_FORCINGS
+                ),
+                science_configurations=config.get(
+                    "science_configurations", internal.DEFAULT_SCIENCE_CONFIGURATIONS
+                ),
+                payu_args=config.get("spatial", {}).get("payu", {}).get("args"),
+            )
+        return self._spatial_tasks
 
     def validate_config(self, config_path: str, verbose: bool):
         """Endpoint for `benchcab validate_config`."""
@@ -180,6 +191,7 @@ def fluxsite_submit_job(
             "The NetCDF output for each task is written to "
             f"{internal.FLUXSITE_DIRS['OUTPUT']}/<task_name>_out.nc"
         )
+        print("")
 
     def checkout(self, config_path: str, verbose: bool):
         """Endpoint for `benchcab checkout`."""
@@ -213,7 +225,7 @@ def checkout(self, config_path: str, verbose: bool):
 
         print("")
 
-    def build(self, config_path: str, verbose: bool):
+    def build(self, config_path: str, verbose: bool, mpi=False):
         """Endpoint for `benchcab build`."""
         config = self._get_config(config_path)
         self._validate_environment(project=config["project"], modules=config["modules"])
@@ -226,11 +238,11 @@ def build(self, config_path: str, verbose: bool):
                 )
                 repo.custom_build(modules=config["modules"], verbose=verbose)
             else:
-                build_mode = "with MPI" if internal.MPI else "serially"
+                build_mode = "with MPI" if mpi else "serially"
                 print(f"Compiling CABLE {build_mode} for realisation {repo.name}...")
-                repo.pre_build(verbose=verbose)
-                repo.run_build(modules=config["modules"], verbose=verbose)
-                repo.post_build(verbose=verbose)
+                repo.pre_build(verbose=verbose, mpi=mpi)
+                repo.run_build(modules=config["modules"], verbose=verbose, mpi=mpi)
+                repo.post_build(verbose=verbose, mpi=mpi)
             print(f"Successfully compiled CABLE for realisation {repo.name}")
         print("")
 
@@ -239,11 +251,10 @@ def fluxsite_setup_work_directory(self, config_path: str, verbose: bool):
         config = self._get_config(config_path)
         self._validate_environment(project=config["project"], modules=config["modules"])
 
-        tasks = self.tasks if self.tasks else self._initialise_tasks(config)
         print("Setting up run directory tree for fluxsite tests...")
         setup_fluxsite_directory_tree(verbose=verbose)
         print("Setting up tasks...")
-        for task in tasks:
+        for task in self._get_fluxsite_tasks(config):
             task.setup_task(verbose=verbose)
         print("Successfully setup fluxsite tasks")
         print("")
@@ -252,8 +263,8 @@ def fluxsite_run_tasks(self, config_path: str, verbose: bool):
         """Endpoint for `benchcab fluxsite-run-tasks`."""
         config = self._get_config(config_path)
         self._validate_environment(project=config["project"], modules=config["modules"])
+        tasks = self._get_fluxsite_tasks(config)
 
-        tasks = self.tasks if self.tasks else self._initialise_tasks(config)
         print("Running fluxsite tasks...")
         try:
             multiprocess = config["fluxsite"]["multiprocess"]
@@ -263,9 +274,9 @@ def fluxsite_run_tasks(self, config_path: str, verbose: bool):
             ncpus = config.get("pbs", {}).get(
                 "ncpus", internal.FLUXSITE_DEFAULT_PBS["ncpus"]
             )
-            run_tasks_in_parallel(tasks, n_processes=ncpus, verbose=verbose)
+            fluxsite.run_tasks_in_parallel(tasks, n_processes=ncpus, verbose=verbose)
         else:
-            run_tasks(tasks, verbose=verbose)
+            fluxsite.run_tasks(tasks, verbose=verbose)
         print("Successfully ran fluxsite tasks")
         print("")
 
@@ -279,8 +290,9 @@ def fluxsite_bitwise_cmp(self, config_path: str, verbose: bool):
                 "nccmp/1.8.5.0"
             )  # use `nccmp -df` for bitwise comparisons
 
-        tasks = self.tasks if self.tasks else self._initialise_tasks(config)
-        comparisons = get_fluxsite_comparisons(tasks)
+        comparisons = fluxsite.get_fluxsite_comparisons(
+            self._get_fluxsite_tasks(config)
+        )
 
         print("Running comparison tasks...")
         try:
@@ -311,10 +323,46 @@ def fluxsite(
         else:
             self.fluxsite_submit_job(config_path, verbose, skip)
 
-    def spatial(self, config_path: str, verbose: bool):
+    def spatial_setup_work_directory(self, config_path: str, verbose: bool):
+        """Endpoint for `benchcab spatial-setup-work-dir`."""
+        config = self._get_config(config_path)
+        self._validate_environment(project=config["project"], modules=config["modules"])
+
+        print("Setting up run directory tree for spatial tests...")
+        setup_spatial_directory_tree()
+        print("Setting up tasks...")
+        try:
+            payu_config = config["spatial"]["payu"]["config"]
+        except KeyError:
+            payu_config = None
+        for task in self._get_spatial_tasks(config):
+            task.setup_task(payu_config=payu_config, verbose=verbose)
+        print("Successfully setup spatial tasks")
+        print("")
+
+    def spatial_run_tasks(self, config_path: str, verbose: bool):
+        """Endpoint for `benchcab spatial-run-tasks`."""
+        config = self._get_config(config_path)
+        self._validate_environment(project=config["project"], modules=config["modules"])
+
+        print("Running spatial tasks...")
+        spatial.run_tasks(tasks=self._get_spatial_tasks(config), verbose=verbose)
+        print("Successfully dispatched payu jobs")
+        print("")
+
+    def spatial(self, config_path: str, verbose: bool, skip: list):
         """Endpoint for `benchcab spatial`."""
+        self.checkout(config_path, verbose)
+        self.build(config_path, verbose, mpi=True)
+        self.spatial_setup_work_directory(config_path, verbose)
+        self.spatial_run_tasks(config_path, verbose)
 
-    def run(self, config_path: str, no_submit: bool, verbose: bool, skip: list[str]):
+    def run(self, config_path: str, verbose: bool, skip: list[str]):
         """Endpoint for `benchcab run`."""
-        self.fluxsite(config_path, no_submit, verbose, skip)
-        self.spatial(config_path, verbose)
+        self.checkout(config_path, verbose)
+        self.build(config_path, verbose)
+        self.build(config_path, verbose, mpi=True)
+        self.fluxsite_setup_work_directory(config_path, verbose)
+        self.spatial_setup_work_directory(config_path, verbose)
+        self.fluxsite_submit_job(config_path, verbose, skip)
+        self.spatial_run_tasks(config_path, verbose)
diff --git a/benchcab/cli.py b/benchcab/cli.py
@@ -38,9 +38,9 @@ def generate_parser(app: Benchcab) -> argparse.ArgumentParser:
         action="store_true",
     )
 
-    # parent parser that contains arguments common to all run specific subcommands
-    args_run_subcommand = argparse.ArgumentParser(add_help=False)
-    args_run_subcommand.add_argument(
+    # parent parser that contains the argument for --no-submit
+    args_no_submit = argparse.ArgumentParser(add_help=False)
+    args_no_submit.add_argument(
         "--no-submit",
         action="store_true",
         help="Force benchcab to execute tasks on the current compute node.",
@@ -80,7 +80,6 @@ def generate_parser(app: Benchcab) -> argparse.ArgumentParser:
         parents=[
             args_help,
             args_subcommand,
-            args_run_subcommand,
             args_composite_subcommand,
         ],
         help="Run all test suites for CABLE.",
@@ -109,7 +108,7 @@ def generate_parser(app: Benchcab) -> argparse.ArgumentParser:
         parents=[
             args_help,
             args_subcommand,
-            args_run_subcommand,
+            args_no_submit,
             args_composite_subcommand,
         ],
         help="Run the fluxsite test suite for CABLE.",
@@ -140,6 +139,11 @@ def generate_parser(app: Benchcab) -> argparse.ArgumentParser:
         config file.""",
         add_help=False,
     )
+    parser_build.add_argument(
+        "--mpi",
+        action="store_true",
+        help="Enable MPI build.",
+    )
     parser_build.set_defaults(func=app.build)
 
     # subcommand: 'benchcab fluxsite-setup-work-dir'
@@ -168,9 +172,9 @@ def generate_parser(app: Benchcab) -> argparse.ArgumentParser:
         "fluxsite-run-tasks",
         parents=[args_help, args_subcommand],
         help="Run the fluxsite tasks of the main fluxsite command.",
-        description="""Runs the fluxsite tasks for the fluxsite test suite. Note, this command should
-        ideally be run inside a PBS job. This command is invoked by the PBS job script generated by
-        `benchcab run`.""",
+        description="""Runs the fluxsite tasks for the fluxsite test suite.
+        Note, this command should ideally be run inside a PBS job. This command
+        is invoked by the PBS job script generated by `benchcab run`.""",
         add_help=False,
     )
     parser_fluxsite_run_tasks.set_defaults(func=app.fluxsite_run_tasks)
@@ -192,11 +196,32 @@ def generate_parser(app: Benchcab) -> argparse.ArgumentParser:
     # subcommand: 'benchcab spatial'
     parser_spatial = subparsers.add_parser(
         "spatial",
-        parents=[args_help, args_subcommand],
+        parents=[args_help, args_subcommand, args_composite_subcommand],
         help="Run the spatial tests only.",
         description="""Runs the default spatial test suite for CABLE.""",
         add_help=False,
     )
     parser_spatial.set_defaults(func=app.spatial)
 
+    # subcommand: 'benchcab spatial-setup-work-dir'
+    parser_spatial_setup_work_dir = subparsers.add_parser(
+        "spatial-setup-work-dir",
+        parents=[args_help, args_subcommand],
+        help="Run the work directory setup step of the spatial command.",
+        description="""Generates the spatial run directory tree in the current working
+        directory so that spatial tasks can be run.""",
+        add_help=False,
+    )
+    parser_spatial_setup_work_dir.set_defaults(func=app.spatial_setup_work_directory)
+
+    # subcommand 'benchcab spatial-run-tasks'
+    parser_spatial_run_tasks = subparsers.add_parser(
+        "spatial-run-tasks",
+        parents=[args_help, args_subcommand],
+        help="Run the spatial tasks of the main spatial command.",
+        description="Runs the spatial tasks for the spatial test suite.",
+        add_help=False,
+    )
+    parser_spatial_run_tasks.set_defaults(func=app.spatial_run_tasks)
+
     return main_parser
diff --git a/benchcab/data/config-schema.yml b/benchcab/data/config-schema.yml
@@ -95,4 +95,26 @@ fluxsite:
           schema:
             type: "string"
             required: false
-
+
+spatial:
+  type: "dict"
+  required: false
+  schema:
+    met_forcings:
+      type: "dict"
+      required: false
+      minlength: 1
+      keysrules:
+        type: "string"
+      valuesrules:
+        type: "string"
+    payu:
+      type: "dict"
+      required: false
+      schema:
+        config:
+          type: "dict"
+          required: false
+        args:
+          type: "string"
+          required: false