diff --git a/benchcab/bench_config.py b/benchcab/bench_config.py index a3cc4ce4..d902ee2a 100644 --- a/benchcab/bench_config.py +++ b/benchcab/bench_config.py @@ -14,8 +14,10 @@ def check_config(config: dict): if any(key not in config for key in internal.CONFIG_REQUIRED_KEYS): raise ValueError( - "The config file does not list all required entries. " - "Those are: " + ", ".join(internal.CONFIG_REQUIRED_KEYS) + "Keys are missing from the config file: " + + ", ".join( + key for key in internal.CONFIG_REQUIRED_KEYS if key not in config + ) ) if not isinstance(config["project"], str): @@ -104,17 +106,4 @@ def read_config(config_path: str) -> dict: check_config(config) - for branch in config["realisations"]: - # Add "name" key if not provided and set to base name of "path" key - branch.setdefault("name", Path(branch["path"]).name) - # Add "revision" key if not provided and set to default value -1, i.e. HEAD of branch - branch.setdefault("revision", -1) - # Add "patch" key if not provided and set to default value {} - branch.setdefault("patch", {}) - # Add "build_script" key if not provided and set to default value "" - branch.setdefault("build_script", "") - - # Add "science_configurations" if not provided and set to default value - config.setdefault("science_configurations", internal.DEFAULT_SCIENCE_CONFIGURATIONS) - return config diff --git a/benchcab/benchcab.py b/benchcab/benchcab.py index bf9a6330..401832f3 100644 --- a/benchcab/benchcab.py +++ b/benchcab/benchcab.py @@ -1,61 +1,158 @@ """Contains the main program entry point for `benchcab`.""" import sys - -from benchcab.job_script import submit_job +import os +import grp +from pathlib import Path +from typing import Optional +from subprocess import CalledProcessError + +from benchcab import internal +from benchcab.internal import get_met_sites from benchcab.bench_config import read_config from benchcab.benchtree import setup_fluxnet_directory_tree, setup_src_dir -from benchcab.build_cable import default_build, custom_build -from benchcab.get_cable import ( - checkout_cable, - checkout_cable_auxiliary, - svn_info_show_item, - next_path, -) -from benchcab.internal import ( - validate_environment, - get_met_sites, - CWD, - MULTIPROCESS, - SITE_LOG_DIR, - SITE_TASKS_DIR, - SITE_OUTPUT_DIR, -) +from benchcab.repository import CableRepository from benchcab.task import ( get_fluxnet_tasks, get_fluxnet_comparisons, run_tasks, run_tasks_in_parallel, - run_comparisons, - run_comparisons_in_parallel, Task, ) +from benchcab.comparison import run_comparisons, run_comparisons_in_parallel from benchcab.cli import generate_parser -from benchcab.environment_modules import module_load, module_is_loaded +from benchcab.environment_modules import EnvironmentModules, EnvironmentModulesInterface +from benchcab.utils.subprocess import SubprocessWrapper, SubprocessWrapperInterface +from benchcab.utils.pbs import render_job_script +from benchcab.utils.logging import next_path class Benchcab: """A class that represents the `benchcab` application.""" - def __init__(self) -> None: - self.args = generate_parser().parse_args( - sys.argv[1:] if sys.argv[1:] else ["-h"] - ) - self.config = read_config(self.args.config) + root_dir: Path = internal.CWD + subprocess_handler: SubprocessWrapperInterface = SubprocessWrapper() + modules_handler: EnvironmentModulesInterface = EnvironmentModules() + + def __init__( + self, + argv: list[str], + config: Optional[dict] = None, + validate_env: bool = True, + ) -> None: + self.args = generate_parser().parse_args(argv[1:] if argv[1:] else ["-h"]) + self.config = config if config else read_config(self.args.config) + self.repos = [ + CableRepository(**config, repo_id=id) + for id, config in enumerate(self.config["realisations"]) + ] self.tasks: list[Task] = [] # initialise fluxnet tasks lazily - validate_environment( - project=self.config["project"], modules=self.config["modules"] + + if validate_env: + self._validate_environment( + project=self.config["project"], modules=self.config["modules"] + ) + + def _validate_environment(self, project: str, modules: list): + """Performs checks on current user environment""" + + if "gadi.nci" not in internal.NODENAME: + print("Error: benchcab is currently implemented only on Gadi") + sys.exit(1) + + namelist_dir = Path(internal.CWD / internal.NAMELIST_DIR) + if not namelist_dir.exists(): + print( + "Error: cannot find 'namelists' directory in current working directory" + ) + sys.exit(1) + + required_groups = [project, "ks32", "hh5"] + groups = [grp.getgrgid(gid).gr_name for gid in os.getgroups()] + if not set(required_groups).issubset(groups): + print( + "Error: user does not have the required group permissions.", + "The required groups are:", + ", ".join(required_groups), + ) + sys.exit(1) + + for modname in modules: + if not self.modules_handler.module_is_avail(modname): + print(f"Error: module ({modname}) is not available.") + sys.exit(1) + + all_site_ids = set( + internal.MEORG_EXPERIMENTS["five-site-test"] + + internal.MEORG_EXPERIMENTS["forty-two-site-test"] ) + for site_id in all_site_ids: + paths = list(internal.MET_DIR.glob(f"{site_id}*")) + if not paths: + print( + f"Error: failed to infer met file for site id '{site_id}' in " + f"{internal.MET_DIR}." + ) + sys.exit(1) + if len(paths) > 1: + print( + f"Error: multiple paths infered for site id: '{site_id}' in {internal.MET_DIR}." + ) + sys.exit(1) def _initialise_tasks(self) -> list[Task]: """A helper method that initialises and returns the `tasks` attribute.""" self.tasks = get_fluxnet_tasks( - realisations=self.config["realisations"], - science_configurations=self.config["science_configurations"], + repos=self.repos, + science_configurations=self.config.get( + "science_configurations", internal.DEFAULT_SCIENCE_CONFIGURATIONS + ), met_sites=get_met_sites(self.config["experiment"]), ) return self.tasks + # TODO(Sean) this method should be the endpoint for the `fluxnet-submit-job` + # command line argument. + def fluxnet_submit_job(self) -> None: + """Submits the PBS job script step in the fluxnet test workflow.""" + + job_script_path = self.root_dir / internal.QSUB_FNAME + print( + "Creating PBS job script to run FLUXNET tasks on compute " + f"nodes: {job_script_path.relative_to(self.root_dir)}" + ) + with job_script_path.open("w", encoding="utf-8") as file: + contents = render_job_script( + project=self.config["project"], + config_path=self.args.config, + modules=self.config["modules"], + storage_flags=[], # TODO(Sean) add storage flags option to config + verbose=self.args.verbose, + skip_bitwise_cmp="fluxnet-bitwise-cmp" in self.args.skip, + ) + file.write(contents) + + try: + proc = self.subprocess_handler.run_cmd( + f"qsub {job_script_path}", + capture_output=True, + verbose=self.args.verbose, + ) + except CalledProcessError as exc: + print("Error when submitting job to NCI queue") + print(exc.output) + raise + + print( + f"PBS job submitted: {proc.stdout.strip()}\n" + "The CABLE log file for each task is written to " + f"{internal.SITE_LOG_DIR}/_log.txt\n" + "The CABLE standard output for each task is written to " + f"{internal.SITE_TASKS_DIR}//out.txt\n" + "The NetCDF output for each task is written to " + f"{internal.SITE_OUTPUT_DIR}/_out.nc" + ) + def checkout(self): """Endpoint for `benchcab checkout`.""" @@ -63,18 +160,23 @@ def checkout(self): print("Checking out repositories...") rev_number_log = "" - for branch in self.config["realisations"]: - path_to_repo = checkout_cable(branch, verbose=self.args.verbose) + for repo in self.repos: + repo.checkout(verbose=self.args.verbose) rev_number_log += ( - f"{branch['name']} last changed revision: " - f"{svn_info_show_item(path_to_repo, 'last-changed-revision')}\n" + f"{repo.name} last changed revision: " + f"{repo.svn_info_show_item('last-changed-revision')}\n" ) # TODO(Sean) we should archive revision numbers for CABLE-AUX - checkout_cable_auxiliary(self.args.verbose) + cable_aux_repo = CableRepository(path=internal.CABLE_AUX_RELATIVE_SVN_PATH) + cable_aux_repo.checkout(verbose=self.args.verbose) - rev_number_log_path = CWD / next_path("rev_number-*.log") - print(f"Writing revision number info to {rev_number_log_path.relative_to(CWD)}") + rev_number_log_path = self.root_dir / next_path( + self.root_dir, "rev_number-*.log" + ) + print( + f"Writing revision number info to {rev_number_log_path.relative_to(self.root_dir)}" + ) with open(rev_number_log_path, "w", encoding="utf-8") as file: file.write(rev_number_log) @@ -82,18 +184,12 @@ def checkout(self): def build(self): """Endpoint for `benchcab build`.""" - for branch in self.config["realisations"]: - if branch["build_script"]: - custom_build( - branch["build_script"], branch["name"], verbose=self.args.verbose - ) + for repo in self.repos: + if repo.build_script: + repo.custom_build(verbose=self.args.verbose) else: - default_build( - branch["name"], - self.config["modules"], - verbose=self.args.verbose, - ) - print(f"Successfully compiled CABLE for realisation {branch['name']}") + repo.build(modules=self.config["modules"], verbose=self.args.verbose) + print(f"Successfully compiled CABLE for realisation {repo.name}") print("") def fluxnet_setup_work_directory(self): @@ -111,7 +207,7 @@ def fluxnet_run_tasks(self): """Endpoint for `benchcab fluxnet-run-tasks`.""" tasks = self.tasks if self.tasks else self._initialise_tasks() print("Running FLUXNET tasks...") - if MULTIPROCESS: + if internal.MULTIPROCESS: run_tasks_in_parallel(tasks, verbose=self.args.verbose) else: run_tasks(tasks, verbose=self.args.verbose) @@ -121,14 +217,16 @@ def fluxnet_run_tasks(self): def fluxnet_bitwise_cmp(self): """Endpoint for `benchcab fluxnet-bitwise-cmp`.""" - if not module_is_loaded("nccmp"): - module_load("nccmp") # use `nccmp -df` for bitwise comparisons + if not self.modules_handler.module_is_loaded("nccmp"): + self.modules_handler.module_load( + "nccmp" + ) # use `nccmp -df` for bitwise comparisons tasks = self.tasks if self.tasks else self._initialise_tasks() comparisons = get_fluxnet_comparisons(tasks) print("Running comparison tasks...") - if MULTIPROCESS: + if internal.MULTIPROCESS: run_comparisons_in_parallel(comparisons, verbose=self.args.verbose) else: run_comparisons(comparisons, verbose=self.args.verbose) @@ -144,25 +242,7 @@ def fluxnet(self): if "fluxnet-bitwise-cmp" not in self.args.skip: self.fluxnet_bitwise_cmp() else: - submit_job( - project=self.config["project"], - config_path=self.args.config, - modules=self.config["modules"], - verbose=self.args.verbose, - skip_bitwise_cmp="fluxnet-bitwise-cmp" in self.args.skip, - ) - print( - "The CABLE log file for each task is written to " - f"{SITE_LOG_DIR}/_log.txt" - ) - print( - "The CABLE standard output for each task is written to " - f"{SITE_TASKS_DIR}//out.txt" - ) - print( - "The NetCDF output for each task is written to " - f"{SITE_OUTPUT_DIR}/_out.nc" - ) + self.fluxnet_submit_job() def spatial(self): """Endpoint for `benchcab spatial`.""" @@ -206,7 +286,7 @@ def main(): This is required for setup.py entry_points """ - app = Benchcab() + app = Benchcab(argv=sys.argv) app.main() diff --git a/benchcab/benchtree.py b/benchcab/benchtree.py index 1dfaefa9..ef51cd07 100644 --- a/benchcab/benchtree.py +++ b/benchcab/benchtree.py @@ -1,4 +1,5 @@ -"""Contains functions for generating the directory structure used for `benchcab`.""" +"""A module containing functions for generating the directory structure used for `benchcab`.""" + from pathlib import Path import os import shutil @@ -7,77 +8,79 @@ from benchcab.task import Task -def clean_directory_tree(): +def clean_directory_tree(root_dir=internal.CWD): """Remove pre-existing directories in current working directory.""" - src_dir = Path(internal.CWD, internal.SRC_DIR) + src_dir = Path(root_dir, internal.SRC_DIR) if src_dir.exists(): shutil.rmtree(src_dir) - run_dir = Path(internal.CWD, internal.RUN_DIR) + run_dir = Path(root_dir, internal.RUN_DIR) if run_dir.exists(): shutil.rmtree(run_dir) -def setup_src_dir(): +def setup_src_dir(root_dir=internal.CWD): """Make `src` directory.""" - src_dir = Path(internal.CWD, internal.SRC_DIR) + src_dir = Path(root_dir, internal.SRC_DIR) if not src_dir.exists(): - print(f"Creating {src_dir.relative_to(internal.CWD)} directory: {src_dir}") + print(f"Creating {src_dir.relative_to(root_dir)} directory: {src_dir}") os.makedirs(src_dir) -def setup_fluxnet_directory_tree(fluxnet_tasks: list[Task], verbose=False): +def setup_fluxnet_directory_tree( + fluxnet_tasks: list[Task], root_dir=internal.CWD, verbose=False +): """Generate the directory structure used of `benchcab`.""" - run_dir = Path(internal.CWD, internal.RUN_DIR) + run_dir = Path(root_dir, internal.RUN_DIR) if not run_dir.exists(): os.makedirs(run_dir) - site_run_dir = Path(internal.CWD, internal.SITE_RUN_DIR) + site_run_dir = Path(root_dir, internal.SITE_RUN_DIR) if not site_run_dir.exists(): os.makedirs(site_run_dir) - site_log_dir = Path(internal.CWD, internal.SITE_LOG_DIR) + site_log_dir = Path(root_dir, internal.SITE_LOG_DIR) if not site_log_dir.exists(): print( - f"Creating {site_log_dir.relative_to(internal.CWD)} directory: {site_log_dir}" + f"Creating {site_log_dir.relative_to(root_dir)} directory: {site_log_dir}" ) os.makedirs(site_log_dir) - site_output_dir = Path(internal.CWD, internal.SITE_OUTPUT_DIR) + site_output_dir = Path(root_dir, internal.SITE_OUTPUT_DIR) if not site_output_dir.exists(): print( - f"Creating {site_output_dir.relative_to(internal.CWD)} directory: {site_output_dir}" + f"Creating {site_output_dir.relative_to(root_dir)} directory: {site_output_dir}" ) os.makedirs(site_output_dir) - site_tasks_dir = Path(internal.CWD, internal.SITE_TASKS_DIR) + site_tasks_dir = Path(root_dir, internal.SITE_TASKS_DIR) if not site_tasks_dir.exists(): print( - f"Creating {site_tasks_dir.relative_to(internal.CWD)} directory: {site_tasks_dir}" + f"Creating {site_tasks_dir.relative_to(root_dir)} directory: {site_tasks_dir}" ) os.makedirs(site_tasks_dir) - site_analysis_dir = Path(internal.CWD, internal.SITE_ANALYSIS_DIR) + site_analysis_dir = Path(root_dir, internal.SITE_ANALYSIS_DIR) if not site_analysis_dir.exists(): print( - f"Creating {site_analysis_dir.relative_to(internal.CWD)} directory: {site_analysis_dir}" + f"Creating {site_analysis_dir.relative_to(root_dir)} directory: {site_analysis_dir}" ) os.makedirs(site_analysis_dir) - site_bitwise_cmp_dir = Path(internal.CWD, internal.SITE_BITWISE_CMP_DIR) + site_bitwise_cmp_dir = Path(root_dir, internal.SITE_BITWISE_CMP_DIR) if not site_bitwise_cmp_dir.exists(): print( - f"Creating {site_bitwise_cmp_dir.relative_to(internal.CWD)} directory: " + f"Creating {site_bitwise_cmp_dir.relative_to(root_dir)} directory: " f"{site_bitwise_cmp_dir}" ) os.makedirs(site_bitwise_cmp_dir) print("Creating task directories...") for task in fluxnet_tasks: - task_dir = Path(internal.CWD, internal.SITE_TASKS_DIR, task.get_task_name()) + task_dir = Path(root_dir, internal.SITE_TASKS_DIR, task.get_task_name()) if not task_dir.exists(): if verbose: - print(f"Creating {task_dir.relative_to(internal.CWD)}: " f"{task_dir}") + print(f"Creating {task_dir.relative_to(root_dir)}: " f"{task_dir}") os.makedirs(task_dir) diff --git a/benchcab/build_cable.py b/benchcab/build_cable.py deleted file mode 100755 index f912d688..00000000 --- a/benchcab/build_cable.py +++ /dev/null @@ -1,95 +0,0 @@ -"""A module containing functions for building CABLE.""" - -import os -import contextlib -import stat -import shlex -import shutil -import pathlib - -from benchcab import internal -from benchcab.utils import subprocess -from benchcab import environment_modules - - -@contextlib.contextmanager -def chdir(newdir: pathlib.Path): - """Context manager `cd`.""" - prevdir = pathlib.Path.cwd() - os.chdir(newdir.expanduser()) - try: - yield - finally: - os.chdir(prevdir) - - -def remove_module_lines(file_path): - """Remove lines from `file_path` that call the environment modules package.""" - with open(file_path, "r", encoding="utf-8") as file: - contents = file.read() - with open(file_path, "w", encoding="utf-8") as file: - for line in contents.splitlines(True): - cmds = shlex.split(line, comments=True) - if "module" not in cmds: - file.write(line) - - -def default_build(branch_name: str, modules: list, verbose=False): - """Build CABLE using the default script. - - This loads the modules specified in the configuration file. - """ - print( - f"Compiling CABLE {'with MPI' if internal.MPI else 'serially'} for " - f"realisation {branch_name}..." - ) - - default_script_path = ( - internal.CWD / internal.SRC_DIR / branch_name / "offline" / "build3.sh" - ) - - if not default_script_path.is_file(): - raise FileNotFoundError( - f"The default build script, {default_script_path}, could not be found. " - "Do you need to specify a different build script with the " - "'build_script' option in config.yaml?", - ) - - tmp_script_path = default_script_path.parent / "tmp-build3.sh" - - if verbose: - print(f"Copying {default_script_path} to {tmp_script_path}") - shutil.copy(default_script_path, tmp_script_path) - - if verbose: - print(f"chmod +x {tmp_script_path}") - tmp_script_path.chmod(tmp_script_path.stat().st_mode | stat.S_IEXEC) - - if verbose: - print( - f"Modifying {tmp_script_path.name}: remove lines that call " - "environment modules" - ) - remove_module_lines(tmp_script_path) - - with chdir(default_script_path.parent), environment_modules.load( - modules, verbose=verbose - ): - subprocess.run_cmd( - f"./{tmp_script_path.name}" + (" mpi" if internal.MPI else ""), - verbose=verbose, - ) - - -def custom_build(config_build_script: str, branch_name: str, verbose=False): - """Build CABLE with a script provided in configuration file""" - print( - "Compiling CABLE using custom build script for " f"realisation {branch_name}..." - ) - - build_script_path = ( - internal.CWD / internal.SRC_DIR / branch_name / config_build_script - ) - - with chdir(build_script_path.parent): - subprocess.run_cmd(f"./{build_script_path.name}", verbose=verbose) diff --git a/benchcab/comparison.py b/benchcab/comparison.py new file mode 100644 index 00000000..a536c03a --- /dev/null +++ b/benchcab/comparison.py @@ -0,0 +1,87 @@ +"""A module containing functions and data structures for running comparison tasks.""" + +import multiprocessing +import queue +from pathlib import Path +from subprocess import CalledProcessError + + +from benchcab import internal +from benchcab.utils.subprocess import SubprocessWrapperInterface, SubprocessWrapper + + +class ComparisonTask: + """A class used to represent a single bitwise comparison task.""" + + root_dir: Path = internal.CWD + subprocess_handler: SubprocessWrapperInterface = SubprocessWrapper() + + def __init__( + self, + files: tuple[Path, Path], + task_name: str, + ) -> None: + self.files = files + self.task_name = task_name + + def run(self, verbose=False) -> None: + """Executes `nccmp -df` on the NetCDF files pointed to by `self.files`.""" + + file_a, file_b = self.files + if verbose: + print(f"Comparing files {file_a.name} and {file_b.name} bitwise...") + + try: + self.subprocess_handler.run_cmd( + f"nccmp -df {file_a} {file_b}", + capture_output=True, + verbose=verbose, + ) + print(f"Success: files {file_a.name} {file_b.name} are identical") + except CalledProcessError as exc: + output_file = ( + self.root_dir / internal.SITE_BITWISE_CMP_DIR / f"{self.task_name}.txt" + ) + with open(output_file, "w", encoding="utf-8") as file: + file.write(exc.stdout) + print( + f"Failure: files {file_a.name} {file_b.name} differ. " + f"Results of diff have been written to {output_file}" + ) + + +def run_comparisons(comparison_tasks: list[ComparisonTask], verbose=False) -> None: + """Runs bitwise comparison tasks serially.""" + for task in comparison_tasks: + task.run(verbose=verbose) + + +def run_comparisons_in_parallel( + comparison_tasks: list[ComparisonTask], verbose=False +) -> None: + """Runs bitwise comparison tasks in parallel across multiple processes.""" + + task_queue: multiprocessing.Queue = multiprocessing.Queue() + for task in comparison_tasks: + task_queue.put(task) + + processes = [] + for _ in range(internal.NCPUS): + proc = multiprocessing.Process( + target=worker_comparison, args=[task_queue, verbose] + ) + proc.start() + processes.append(proc) + + for proc in processes: + proc.join() + + +def worker_comparison(task_queue: multiprocessing.Queue, verbose=False) -> None: + """Runs bitwise comparison tasks in `task_queue` until the queue is emptied.""" + while True: + try: + task = task_queue.get_nowait() + except queue.Empty: + return + task.run(verbose=verbose) diff --git a/benchcab/environment_modules.py b/benchcab/environment_modules.py index d5cb9700..08da7cc4 100644 --- a/benchcab/environment_modules.py +++ b/benchcab/environment_modules.py @@ -2,6 +2,7 @@ import sys import contextlib +from abc import ABC as AbstractBaseClass, abstractmethod sys.path.append("/opt/Modules/v4.3.0/init") try: @@ -19,37 +20,57 @@ class EnvironmentModulesError(Exception): """Custom exception class for environment modules errors.""" -@contextlib.contextmanager -def load(modules, verbose=False): - """Context manager for loading and unloading modules.""" - if verbose: - print("Loading modules: " + " ".join(modules)) - module_load(*modules) - try: - yield - finally: - if verbose: - print("Unloading modules: " + " ".join(modules)) - module_unload(*modules) +class EnvironmentModulesInterface(AbstractBaseClass): + """An abstract class (interface) that defines abstract methods for interacting + with the environment modules API. + + An interface is defined so that we can easily mock the environment modules API + in our unit tests. + """ + + @abstractmethod + def module_is_avail(self, *args: str) -> bool: + """Wrapper around `module is-avail modulefile...`""" + @abstractmethod + def module_is_loaded(self, *args: str) -> bool: + """Wrapper around `module is-loaded modulefile...`""" -def module_is_avail(*args: str): - """Wrapper around `module is-avail modulefile...`""" - return module("is-avail", *args) + @abstractmethod + def module_load(self, *args: str) -> None: + """Wrapper around `module load modulefile...`""" + + @abstractmethod + def module_unload(self, *args: str) -> None: + """Wrapper around `module unload modulefile...`""" + + @contextlib.contextmanager + def load(self, modules: list[str], verbose=False): + """Context manager for loading and unloading modules.""" + if verbose: + print("Loading modules: " + " ".join(modules)) + self.module_load(*modules) + try: + yield + finally: + if verbose: + print("Unloading modules: " + " ".join(modules)) + self.module_unload(*modules) -def module_is_loaded(*args: str): - """Wrapper around `module is-loaded modulefile...`""" - return module("is-loaded", *args) +class EnvironmentModules(EnvironmentModulesInterface): + """A concrete implementation of the `EnvironmentModulesInterface` abstract class.""" + def module_is_avail(self, *args: str) -> bool: + return module("is-avail", *args) -def module_load(*args: str): - """Wrapper around `module load modulefile...`""" - if not module("load", *args): - raise EnvironmentModulesError("Failed to load modules: " + " ".join(args)) + def module_is_loaded(self, *args: str) -> bool: + return module("is-loaded", *args) + def module_load(self, *args: str) -> None: + if not module("load", *args): + raise EnvironmentModulesError("Failed to load modules: " + " ".join(args)) -def module_unload(*args: str): - """Wrapper around `module unload modulefile...`""" - if not module("unload", *args): - raise EnvironmentModulesError("Failed to unload modules: " + " ".join(args)) + def module_unload(self, *args: str) -> None: + if not module("unload", *args): + raise EnvironmentModulesError("Failed to unload modules: " + " ".join(args)) diff --git a/benchcab/get_cable.py b/benchcab/get_cable.py deleted file mode 100755 index 790b09ee..00000000 --- a/benchcab/get_cable.py +++ /dev/null @@ -1,93 +0,0 @@ -"""A module containing functions for checking out CABLE repositories.""" - -from typing import Union -from pathlib import Path - -from benchcab import internal -from benchcab.utils import subprocess - - -def next_path(path_pattern, sep="-"): - """Finds the next free path in a sequentially named list of - files with the following pattern: - - path_pattern = 'file{sep}*.suf': - - file-1.txt - file-2.txt - file-3.txt - """ - - loc_pattern = Path(path_pattern) - new_file_index = 1 - common_filename, _ = loc_pattern.stem.split(sep) - - pattern_files_sorted = sorted(internal.CWD.glob(path_pattern)) - if pattern_files_sorted != []: - common_filename, last_file_index = pattern_files_sorted[-1].stem.split(sep) - new_file_index = int(last_file_index) + 1 - - return f"{common_filename}{sep}{new_file_index}{loc_pattern.suffix}" - - -def svn_info_show_item(path: Union[Path, str], item: str) -> str: - """A wrapper around `svn info --show-item `.""" - proc = subprocess.run_cmd( - f"svn info --show-item {item} {path}", capture_output=True - ) - return proc.stdout.strip() - - -def checkout_cable_auxiliary(verbose=False) -> Path: - """Checkout CABLE-AUX.""" - - cable_aux_dir = Path(internal.CWD / internal.CABLE_AUX_DIR) - - subprocess.run_cmd( - f"svn checkout {internal.CABLE_SVN_ROOT}/branches/Share/CABLE-AUX {cable_aux_dir}", - verbose=verbose, - ) - - revision = svn_info_show_item(cable_aux_dir, "revision") - print(f"Successfully checked out CABLE-AUX at revision {revision}") - - # Check relevant files exist in repository: - - if not Path.exists(internal.CWD / internal.GRID_FILE): - raise RuntimeError( - f"Error checking out CABLE-AUX: cannot find file '{internal.GRID_FILE}'" - ) - - if not Path.exists(internal.CWD / internal.PHEN_FILE): - raise RuntimeError( - f"Error checking out CABLE-AUX: cannot find file '{internal.PHEN_FILE}'" - ) - - if not Path.exists(internal.CWD / internal.CNPBIOME_FILE): - raise RuntimeError( - f"Error checking out CABLE-AUX: cannot find file '{internal.CNPBIOME_FILE}'" - ) - - return cable_aux_dir - - -def checkout_cable(branch_config: dict, verbose=False) -> Path: - """Checkout a branch of CABLE.""" - # TODO(Sean) do nothing if the repository has already been checked out? - # This also relates the 'clean' feature. - - cmd = "svn checkout" - - # Check if a specified revision is required. Negative value means take the latest - if branch_config["revision"] > 0: - cmd += f" -r {branch_config['revision']}" - - path_to_repo = Path(internal.CWD, internal.SRC_DIR, branch_config["name"]) - cmd += f" {internal.CABLE_SVN_ROOT}/{branch_config['path']} {path_to_repo}" - - subprocess.run_cmd(cmd, verbose=verbose) - - revision = svn_info_show_item(path_to_repo, "revision") - print(f"Successfully checked out {branch_config['name']} at revision {revision}") - - return path_to_repo diff --git a/benchcab/internal.py b/benchcab/internal.py index 83e63c67..83fdad71 100644 --- a/benchcab/internal.py +++ b/benchcab/internal.py @@ -1,11 +1,8 @@ """internal.py: define all runtime constants in a single file.""" import os -import sys -import grp from pathlib import Path -from .environment_modules import module_is_avail _, NODENAME, _, _, _ = os.uname() @@ -39,6 +36,13 @@ # Relative path to CABLE Auxiliary repository CABLE_AUX_DIR = SRC_DIR / "CABLE-AUX" +# Relative URL path to CABLE Auxiliary repository on SVN +CABLE_AUX_RELATIVE_SVN_PATH = "branches/Share/CABLE-AUX" + +# TODO(Sean): hard coding paths assets in CABLE_AUX is brittle, these should +# be promoted to config parameters, especially since we no longer throw exceptions +# when the assets cannot be found. + # Relative path to CABLE grid info file GRID_FILE = CABLE_AUX_DIR / "offline" / "gridinfo_CSIRO_1x1.nc" @@ -213,47 +217,3 @@ def get_met_sites(experiment: str) -> list[str]: ] return met_sites - - -def validate_environment(project: str, modules: list): - """Performs checks on current user environment""" - - if "gadi.nci" not in NODENAME: - print("Error: benchcab is currently implemented only on Gadi") - sys.exit(1) - - namelist_dir = Path(CWD / NAMELIST_DIR) - if not namelist_dir.exists(): - print("Error: cannot find 'namelists' directory in current working directory") - sys.exit(1) - - required_groups = [project, "ks32", "hh5"] - groups = [grp.getgrgid(gid).gr_name for gid in os.getgroups()] - if not set(required_groups).issubset(groups): - print( - "Error: user does not have the required group permissions.", - "The required groups are:", - ", ".join(required_groups), - ) - sys.exit(1) - - for modname in modules: - if not module_is_avail(modname): - print(f"Error: module ({modname}) is not available.") - sys.exit(1) - - all_site_ids = set( - MEORG_EXPERIMENTS["five-site-test"] + MEORG_EXPERIMENTS["forty-two-site-test"] - ) - for site_id in all_site_ids: - paths = list(MET_DIR.glob(f"{site_id}*")) - if not paths: - print( - f"Error: failed to infer met file for site id '{site_id}' in {MET_DIR}." - ) - sys.exit(1) - if len(paths) > 1: - print( - f"Error: multiple paths infered for site id: '{site_id}' in {MET_DIR}." - ) - sys.exit(1) diff --git a/benchcab/job_script.py b/benchcab/job_script.py deleted file mode 100644 index 3a539c85..00000000 --- a/benchcab/job_script.py +++ /dev/null @@ -1,83 +0,0 @@ -"""Contains functions for job script creation and submission on Gadi.""" - -from subprocess import CalledProcessError -from pathlib import Path - -from benchcab import internal -from benchcab.utils import subprocess - - -def get_local_storage_flag(path: Path) -> str: - """Returns the PBS storage flag for a path on the Gadi file system.""" - if str(path).startswith("/scratch"): - return f"scratch/{path.parts[2]}" - if str(path).startswith("/g/data"): - return f"gdata/{path.parts[3]}" - raise RuntimeError("Current directory structure unknown on Gadi.") - - -def submit_job( - project: str, - config_path: str, - modules: list, - verbose=False, - skip_bitwise_cmp=False, -): - """Submits a PBS job that executes all computationally expensive commands. - - This includes things such as running CABLE and running bitwise comparison jobs - between model output files. - The PBS job script is written to the current working directory as a side effect. - """ - - job_script_path = internal.CWD / internal.QSUB_FNAME - module_load_lines = "\n".join( - f"module load {module_name}" for module_name in modules - ) - verbose_flag = "-v" if verbose else "" - - print( - "Creating PBS job script to run FLUXNET tasks on compute " - f"nodes: {job_script_path.relative_to(internal.CWD)}" - ) - with open(job_script_path, "w", encoding="utf-8") as file: - file.write( - f"""#!/bin/bash -#PBS -l wd -#PBS -l ncpus={internal.NCPUS} -#PBS -l mem={internal.MEM} -#PBS -l walltime={internal.WALL_TIME} -#PBS -q normal -#PBS -P {project} -#PBS -j oe -#PBS -m e -#PBS -l storage=gdata/ks32+gdata/hh5+gdata/{project}+{get_local_storage_flag(internal.CWD)} - -module purge -module use /g/data/hh5/public/modules -module load conda/analysis3-unstable -{module_load_lines} - -benchcab fluxnet-run-tasks --config={config_path} {verbose_flag} -if [ $? -ne 0 ]; then - echo 'Error: benchcab fluxnet-run-tasks failed. Exiting...' - exit 1 -fi -{'' if skip_bitwise_cmp else f''' -benchcab fluxnet-bitwise-cmp --config={config_path} {verbose_flag} -if [ $? -ne 0 ]; then - echo 'Error: benchcab fluxnet-bitwise-cmp failed. Exiting...' - exit 1 -fi''' } -""" - ) - - try: - proc = subprocess.run_cmd( - f"qsub {job_script_path}", capture_output=True, verbose=verbose - ) - print(f"PBS job submitted: {proc.stdout.strip()}") - except CalledProcessError as exc: - print("Error when submitting job to NCI queue") - print(exc.stderr) - raise diff --git a/benchcab/repository.py b/benchcab/repository.py new file mode 100644 index 00000000..700adddd --- /dev/null +++ b/benchcab/repository.py @@ -0,0 +1,165 @@ +"""A module containing functions and data structures for manipulating CABLE repositories.""" + +import shlex +import contextlib +import os +import shutil +import stat +from pathlib import Path +from typing import Optional + +from benchcab import internal +from benchcab.environment_modules import EnvironmentModulesInterface, EnvironmentModules +from benchcab.utils.subprocess import SubprocessWrapperInterface, SubprocessWrapper + + +@contextlib.contextmanager +def chdir(newdir: Path): + """Context manager `cd`.""" + prevdir = Path.cwd() + os.chdir(newdir.expanduser()) + try: + yield + finally: + os.chdir(prevdir) + + +class CableRepository: + """A class used to represent a CABLE repository.""" + + root_dir: Path = internal.CWD + subprocess_handler: SubprocessWrapperInterface = SubprocessWrapper() + modules_handler: EnvironmentModulesInterface = EnvironmentModules() + + def __init__( + self, + path: str, + name: Optional[str] = None, + revision: Optional[int] = None, + patch: Optional[dict] = None, + build_script: Optional[str] = None, + repo_id: Optional[int] = None, + ) -> None: + self.path = Path(path) + self.name = name if name else self.path.name + self.revision = revision + self.patch = patch + self.build_script = build_script + self._repo_id = repo_id + + @property + def repo_id(self) -> int: + """Get or set the repo ID.""" + if self._repo_id is None: + raise RuntimeError("Attempting to access undefined repo ID") + return self._repo_id + + @repo_id.setter + def repo_id(self, value: int): + self._repo_id = value + + def checkout(self, verbose=False) -> None: + """Checkout a branch of CABLE.""" + # TODO(Sean) do nothing if the repository has already been checked out? + # This also relates the 'clean' feature. + + cmd = "svn checkout" + + if self.revision: + cmd += f" -r {self.revision}" + + path_to_repo = self.root_dir / internal.SRC_DIR / self.name + cmd += f" {internal.CABLE_SVN_ROOT}/{self.path} {path_to_repo}" + + self.subprocess_handler.run_cmd(cmd, verbose=verbose) + + revision = self.svn_info_show_item("revision") + print(f"Successfully checked out {self.name} at revision {revision}") + + def svn_info_show_item(self, item: str) -> str: + """A wrapper around `svn info --show-item `.""" + path_to_repo = self.root_dir / internal.SRC_DIR / self.name + proc = self.subprocess_handler.run_cmd( + f"svn info --show-item {item} {path_to_repo}", capture_output=True + ) + return proc.stdout.strip() + + # TODO(Sean) the modules argument should be in the constructor and + # `custom_build()` should be a part of `build()`. This is part of + # issue #94. + def build(self, modules: list[str], verbose=False) -> None: + """Build CABLE using the default script.""" + + print( + f"Compiling CABLE {'with MPI' if internal.MPI else 'serially'} for " + f"realisation {self.name}..." + ) + + default_script_path = ( + self.root_dir / internal.SRC_DIR / self.name / "offline" / "build3.sh" + ) + + if not default_script_path.is_file(): + raise FileNotFoundError( + f"The default build script, {default_script_path}, could not be found. " + "Do you need to specify a different build script with the " + "'build_script' option in config.yaml?", + ) + + tmp_script_path = default_script_path.parent / "tmp-build3.sh" + + if verbose: + print(f"Copying {default_script_path} to {tmp_script_path}") + shutil.copy(default_script_path, tmp_script_path) + + if verbose: + print(f"chmod +x {tmp_script_path}") + tmp_script_path.chmod(tmp_script_path.stat().st_mode | stat.S_IEXEC) + + if verbose: + print( + f"Modifying {tmp_script_path.name}: remove lines that call " + "environment modules" + ) + remove_module_lines(tmp_script_path) + + with chdir(default_script_path.parent), self.modules_handler.load( + modules, verbose=verbose + ): + self.subprocess_handler.run_cmd( + f"./{tmp_script_path.name}" + (" mpi" if internal.MPI else ""), + verbose=verbose, + ) + + def custom_build(self, verbose=False) -> None: + """Build CABLE with a script provided in configuration file""" + + if self.build_script is None: + # TODO(Sean) it is bad that we are allowing this to fail silently + # but this will be fixed once we have a single build function. + return + + print( + "Compiling CABLE using custom build script for " + f"realisation {self.name}..." + ) + + build_script_path = ( + self.root_dir / internal.SRC_DIR / self.name / self.build_script + ) + + with chdir(build_script_path.parent): + self.subprocess_handler.run_cmd( + f"./{build_script_path.name}", verbose=verbose + ) + + +def remove_module_lines(file_path: Path) -> None: + """Remove lines from `file_path` that call the environment modules package.""" + with file_path.open("r", encoding="utf-8") as file: + contents = file.read() + with file_path.open("w", encoding="utf-8") as file: + for line in contents.splitlines(True): + cmds = shlex.split(line, comments=True) + if "module" not in cmds: + file.write(line) diff --git a/benchcab/task.py b/benchcab/task.py index ddb9e107..9cad8cf9 100644 --- a/benchcab/task.py +++ b/benchcab/task.py @@ -1,11 +1,9 @@ """A module containing functions and data structures for running fluxnet tasks.""" -import os import shutil import multiprocessing import queue -import dataclasses from pathlib import Path from typing import TypeVar, Dict, Any from subprocess import CalledProcessError @@ -15,8 +13,9 @@ import f90nml from benchcab import internal -from benchcab.utils import subprocess -import benchcab.get_cable +from benchcab.repository import CableRepository +from benchcab.comparison import ComparisonTask +from benchcab.utils.subprocess import SubprocessWrapperInterface, SubprocessWrapper # fmt: off @@ -66,21 +65,28 @@ class CableError(Exception): """Custom exception class for CABLE errors.""" -@dataclasses.dataclass class Task: """A class used to represent a single fluxnet task.""" - branch_id: int - branch_name: str - branch_patch: dict - met_forcing_file: str - sci_conf_id: int - sci_config: dict + root_dir: Path = internal.CWD + subprocess_handler: SubprocessWrapperInterface = SubprocessWrapper() + + def __init__( + self, + repo: CableRepository, + met_forcing_file: str, + sci_conf_id: int, + sci_config: dict, + ) -> None: + self.repo = repo + self.met_forcing_file = met_forcing_file + self.sci_conf_id = sci_conf_id + self.sci_config = sci_config def get_task_name(self) -> str: """Returns the file name convention used for this task.""" met_forcing_base_filename = self.met_forcing_file.split(".")[0] - return f"{met_forcing_base_filename}_R{self.branch_id}_S{self.sci_conf_id}" + return f"{met_forcing_base_filename}_R{self.repo.repo_id}_S{self.sci_conf_id}" def get_output_filename(self) -> str: """Returns the file name convention used for the netcdf output file.""" @@ -109,7 +115,7 @@ def setup_task(self, verbose=False): self.fetch_files(verbose=verbose) nml_path = ( - internal.CWD + self.root_dir / internal.SITE_TASKS_DIR / self.get_task_name() / internal.CABLE_NML @@ -124,25 +130,25 @@ def setup_task(self, verbose=False): "filename": { "met": str(internal.MET_DIR / self.met_forcing_file), "out": str( - internal.CWD + self.root_dir / internal.SITE_OUTPUT_DIR / self.get_output_filename() ), "log": str( - internal.CWD + self.root_dir / internal.SITE_LOG_DIR / self.get_log_filename() ), "restart_out": " ", - "type": str(internal.CWD / internal.GRID_FILE), + "type": str(self.root_dir / internal.GRID_FILE), }, "output": { "restart": False, }, "fixedCO2": internal.CABLE_FIXED_CO2_CONC, "casafile": { - "phen": str(internal.CWD / internal.PHEN_FILE), - "cnpbiome": str(internal.CWD / internal.CNPBIOME_FILE), + "phen": str(self.root_dir / internal.PHEN_FILE), + "cnpbiome": str(self.root_dir / internal.CNPBIOME_FILE), }, "spinup": False, } @@ -153,12 +159,12 @@ def setup_task(self, verbose=False): print(f" Adding science configurations to CABLE namelist file {nml_path}") patch_namelist(nml_path, self.sci_config) - if self.branch_patch: + if self.repo.patch: if verbose: print( f" Adding branch specific configurations to CABLE namelist file {nml_path}" ) - patch_namelist(nml_path, self.branch_patch) + patch_namelist(nml_path, self.repo.patch) def clean_task(self, verbose=False): """Cleans output files, namelist files, log files and cable executables if they exist.""" @@ -166,28 +172,33 @@ def clean_task(self, verbose=False): if verbose: print(" Cleaning task") - task_name = self.get_task_name() - task_dir = Path(internal.CWD, internal.SITE_TASKS_DIR, task_name) + task_dir = self.root_dir / internal.SITE_TASKS_DIR / self.get_task_name() - if Path.exists(task_dir / internal.CABLE_EXE): - os.remove(task_dir / internal.CABLE_EXE) + cable_exe = task_dir / internal.CABLE_EXE + if cable_exe.exists(): + cable_exe.unlink() - if Path.exists(task_dir / internal.CABLE_NML): - os.remove(task_dir / internal.CABLE_NML) + cable_nml = task_dir / internal.CABLE_NML + if cable_nml.exists(): + cable_nml.unlink() - if Path.exists(task_dir / internal.CABLE_VEGETATION_NML): - os.remove(task_dir / internal.CABLE_VEGETATION_NML) + cable_vegetation_nml = task_dir / internal.CABLE_VEGETATION_NML + if cable_vegetation_nml.exists(): + cable_vegetation_nml.unlink() - if Path.exists(task_dir / internal.CABLE_SOIL_NML): - os.remove(task_dir / internal.CABLE_SOIL_NML) + cable_soil_nml = task_dir / internal.CABLE_SOIL_NML + if cable_soil_nml.exists(): + cable_soil_nml.unlink() - output_file = self.get_output_filename() - if Path.exists(internal.CWD / internal.SITE_OUTPUT_DIR / output_file): - os.remove(internal.CWD / internal.SITE_OUTPUT_DIR / output_file) + output_file = ( + self.root_dir / internal.SITE_OUTPUT_DIR / self.get_output_filename() + ) + if output_file.exists(): + output_file.unlink() - log_file = self.get_log_filename() - if Path.exists(internal.CWD / internal.SITE_LOG_DIR / log_file): - os.remove(internal.CWD / internal.SITE_LOG_DIR / log_file) + log_file = self.root_dir / internal.SITE_LOG_DIR / self.get_log_filename() + if log_file.exists(): + log_file.unlink() return self @@ -199,22 +210,22 @@ def fetch_files(self, verbose=False): - copies cable executable from source to 'runs/site/tasks/' directory. """ - task_dir = Path(internal.CWD, internal.SITE_TASKS_DIR, self.get_task_name()) + task_dir = self.root_dir / internal.SITE_TASKS_DIR / self.get_task_name() if verbose: print( - f" Copying namelist files from {internal.CWD / internal.NAMELIST_DIR} " + f" Copying namelist files from {self.root_dir / internal.NAMELIST_DIR} " f"to {task_dir}" ) shutil.copytree( - internal.CWD / internal.NAMELIST_DIR, task_dir, dirs_exist_ok=True + self.root_dir / internal.NAMELIST_DIR, task_dir, dirs_exist_ok=True ) exe_src = ( - internal.CWD + self.root_dir / internal.SRC_DIR - / self.branch_name + / self.repo.name / "offline" / internal.CABLE_EXE ) @@ -230,7 +241,7 @@ def fetch_files(self, verbose=False): def run(self, verbose=False): """Runs a single fluxnet task.""" task_name = self.get_task_name() - task_dir = internal.CWD / internal.SITE_TASKS_DIR / task_name + task_dir = self.root_dir / internal.SITE_TASKS_DIR / task_name if verbose: print( f"Running task {task_name}... CABLE standard output " @@ -248,13 +259,13 @@ def run_cable(self, verbose=False): Raises `CableError` when CABLE returns a non-zero exit code. """ task_name = self.get_task_name() - task_dir = internal.CWD / internal.SITE_TASKS_DIR / task_name + task_dir = self.root_dir / internal.SITE_TASKS_DIR / task_name exe_path = task_dir / internal.CABLE_EXE nml_path = task_dir / internal.CABLE_NML stdout_path = task_dir / internal.CABLE_STDOUT_FILENAME try: - subprocess.run_cmd( + self.subprocess_handler.run_cmd( f"{exe_path} {nml_path}", output_file=stdout_path, verbose=verbose ) except CalledProcessError as exc: @@ -268,10 +279,10 @@ def add_provenance_info(self, verbose=False): the namelist file used to run cable. """ nc_output_path = ( - internal.CWD / internal.SITE_OUTPUT_DIR / self.get_output_filename() + self.root_dir / internal.SITE_OUTPUT_DIR / self.get_output_filename() ) nml = f90nml.read( - internal.CWD + self.root_dir / internal.SITE_TASKS_DIR / self.get_task_name() / internal.CABLE_NML @@ -288,33 +299,27 @@ def add_provenance_info(self, verbose=False): ).items() }, **{ - "cable_branch": benchcab.get_cable.svn_info_show_item( - internal.CWD / internal.SRC_DIR / self.branch_name, "url" - ), - "svn_revision_number": benchcab.get_cable.svn_info_show_item( - internal.CWD / internal.SRC_DIR / self.branch_name, - "revision", - ), + "cable_branch": self.repo.svn_info_show_item("url"), + "svn_revision_number": self.repo.svn_info_show_item("revision"), }, } ) def get_fluxnet_tasks( - realisations: list[dict], science_configurations: list[dict], met_sites: list[str] + repos: list[CableRepository], + science_configurations: list[dict], + met_sites: list[str], ) -> list[Task]: """Returns a list of fluxnet tasks to run.""" - # TODO(Sean) convert this to a generator tasks = [ Task( - branch_id=branch_id, - branch_name=branch["name"], - branch_patch=branch["patch"], + repo=repo, met_forcing_file=site, sci_conf_id=sci_conf_id, sci_config=sci_config, ) - for branch_id, branch in enumerate(realisations) + for repo in repos for site in met_sites for sci_conf_id, sci_config in enumerate(science_configurations) ] @@ -354,20 +359,31 @@ def worker_run(task_queue: multiprocessing.Queue, verbose=False): task.run(verbose=verbose) -def get_fluxnet_comparisons(tasks: list[Task]) -> list[tuple[Task, Task]]: +def get_fluxnet_comparisons( + tasks: list[Task], root_dir=internal.CWD +) -> list[ComparisonTask]: """Returns a list of pairs of fluxnet tasks to run comparisons with. Pairs should be matching in science configurations and meteorological forcing, but differ in realisations. When multiple realisations are specified, return all pair wise combinations between all realisations. """ + output_dir = root_dir / internal.SITE_OUTPUT_DIR return [ - (task_a, task_b) + ComparisonTask( + files=( + output_dir / task_a.get_output_filename(), + output_dir / task_b.get_output_filename(), + ), + task_name=get_comparison_name( + task_a.repo, task_b.repo, task_a.met_forcing_file, task_a.sci_conf_id + ), + ) for task_a in tasks for task_b in tasks if task_a.met_forcing_file == task_b.met_forcing_file and task_a.sci_conf_id == task_b.sci_conf_id - and task_a.branch_id < task_b.branch_id + and task_a.repo.repo_id < task_b.repo.repo_id # TODO(Sean): Review later - the following code avoids using a double # for loop to generate pair wise combinations, however we would have # to re-initialize task instances to get access to the output file path @@ -381,82 +397,15 @@ def get_fluxnet_comparisons(tasks: list[Task]) -> list[tuple[Task, Task]]: ] -def get_comparison_name(task_a: Task, task_b: Task) -> str: - """Returns the naming convention used for bitwise comparisons. - - Assumes `met_forcing_file` and `sci_conf_id` attributes are - common to both tasks. - """ - met_forcing_base_filename = task_a.met_forcing_file.split(".")[0] +def get_comparison_name( + repo_a: CableRepository, + repo_b: CableRepository, + met_forcing_file: str, + sci_conf_id: int, +) -> str: + """Returns the naming convention used for bitwise comparisons.""" + met_forcing_base_filename = met_forcing_file.split(".")[0] return ( - f"{met_forcing_base_filename}_S{task_a.sci_conf_id}" - f"_R{task_a.branch_id}_R{task_b.branch_id}" - ) - - -def run_comparisons(comparisons: list[tuple[Task, Task]], verbose=False): - """Runs bitwise comparison tasks serially.""" - for task_a, task_b in comparisons: - run_comparison(task_a, task_b, verbose=verbose) - - -def run_comparisons_in_parallel(comparisons: list[tuple[Task, Task]], verbose=False): - """Runs bitwise comparison tasks in parallel across multiple processes.""" - - task_queue: multiprocessing.Queue = multiprocessing.Queue() - for pair in comparisons: - task_queue.put(pair) - - processes = [] - for _ in range(internal.NCPUS): - proc = multiprocessing.Process( - target=worker_comparison, args=[task_queue, verbose] - ) - proc.start() - processes.append(proc) - - for proc in processes: - proc.join() - - -def worker_comparison(task_queue: multiprocessing.Queue, verbose=False): - """Runs bitwise comparison tasks in `task_queue` until the queue is emptied.""" - while True: - try: - task_a, task_b = task_queue.get_nowait() - except queue.Empty: - return - run_comparison(task_a, task_b, verbose=verbose) - - -def run_comparison(task_a: Task, task_b: Task, verbose=False): - """Executes `nccmp -df` between the NetCDF output file of `task_a` and of `task_b`.""" - task_a_output = ( - internal.CWD / internal.SITE_OUTPUT_DIR / task_a.get_output_filename() - ) - task_b_output = ( - internal.CWD / internal.SITE_OUTPUT_DIR / task_b.get_output_filename() + f"{met_forcing_base_filename}_S{sci_conf_id}" + f"_R{repo_a.repo_id}_R{repo_b.repo_id}" ) - output_file = ( - internal.CWD - / internal.SITE_BITWISE_CMP_DIR - / f"{get_comparison_name(task_a, task_b)}.txt" - ) - if verbose: - print( - f"Comparing files {task_a_output.name} and {task_b_output.name} bitwise..." - ) - try: - subprocess.run_cmd( - f"nccmp -df {task_a_output} {task_b_output}", - capture_output=True, - verbose=verbose, - ) - print(f"Success: files {task_a_output.name} {task_b_output.name} are identical") - except CalledProcessError as exc: - with open(output_file, "w", encoding="utf-8") as file: - file.write(exc.stdout) - print( - f"Failure: files {task_a_output.name} {task_b_output.name} differ. " - f"Results of diff have been written to {output_file}" - ) diff --git a/benchcab/utils/logging.py b/benchcab/utils/logging.py new file mode 100644 index 00000000..06200a5c --- /dev/null +++ b/benchcab/utils/logging.py @@ -0,0 +1,26 @@ +"""Contains helper functions for logging.""" + +from pathlib import Path + + +def next_path(path: Path, path_pattern: str, sep: str = "-"): + """Finds the next free path in a sequentially named list of + files with the following pattern in the `path` directory: + + path_pattern = 'file{sep}*.suf': + + file-1.txt + file-2.txt + file-3.txt + """ + + loc_pattern = Path(path_pattern) + new_file_index = 1 + common_filename, _ = loc_pattern.stem.split(sep) + + pattern_files_sorted = sorted(path.glob(path_pattern)) + if pattern_files_sorted != []: + common_filename, last_file_index = pattern_files_sorted[-1].stem.split(sep) + new_file_index = int(last_file_index) + 1 + + return f"{common_filename}{sep}{new_file_index}{loc_pattern.suffix}" diff --git a/benchcab/utils/pbs.py b/benchcab/utils/pbs.py new file mode 100644 index 00000000..193380aa --- /dev/null +++ b/benchcab/utils/pbs.py @@ -0,0 +1,52 @@ +"""Contains helper functions for manipulating PBS job scripts.""" + +from benchcab import internal + + +def render_job_script( + project: str, + config_path: str, + modules: list, + storage_flags: list, + verbose=False, + skip_bitwise_cmp=False, +) -> str: + """Returns a PBS job that executes all computationally expensive commands. + + This includes things such as running CABLE and running bitwise comparison jobs + between model output files. The PBS job script is written to the current + working directory as a side effect. + """ + + module_load_lines = "\n".join( + f"module load {module_name}" for module_name in modules + ) + verbose_flag = "-v" if verbose else "" + return f"""#!/bin/bash +#PBS -l wd +#PBS -l ncpus={internal.NCPUS} +#PBS -l mem={internal.MEM} +#PBS -l walltime={internal.WALL_TIME} +#PBS -q normal +#PBS -P {project} +#PBS -j oe +#PBS -m e +#PBS -l storage=gdata/ks32+gdata/hh5+gdata/{project}+{'+'.join(storage_flags)} + +module purge +module use /g/data/hh5/public/modules +module load conda/analysis3-unstable +{module_load_lines} + +benchcab fluxnet-run-tasks --config={config_path} {verbose_flag} +if [ $? -ne 0 ]; then + echo 'Error: benchcab fluxnet-run-tasks failed. Exiting...' + exit 1 +fi +{'' if skip_bitwise_cmp else f''' +benchcab fluxnet-bitwise-cmp --config={config_path} {verbose_flag} +if [ $? -ne 0 ]; then + echo 'Error: benchcab fluxnet-bitwise-cmp failed. Exiting...' + exit 1 +fi''' } +""" diff --git a/benchcab/utils/subprocess.py b/benchcab/utils/subprocess.py index 34228e30..c2450ecc 100644 --- a/benchcab/utils/subprocess.py +++ b/benchcab/utils/subprocess.py @@ -1,35 +1,57 @@ """A module containing utility functions that wraps around the `subprocess` module.""" +from abc import ABC as AbstractBaseClass, abstractmethod import subprocess import contextlib import pathlib from typing import Any, Optional -def run_cmd( - cmd: str, - capture_output: bool = False, - output_file: Optional[pathlib.Path] = None, - verbose: bool = False, -) -> subprocess.CompletedProcess: - """Helper function that wraps around `subprocess.run()`""" - - kwargs: Any = {} - with contextlib.ExitStack() as stack: - if capture_output: - kwargs["capture_output"] = True - kwargs["text"] = True - else: - if output_file: - kwargs["stdout"] = stack.enter_context( - output_file.open("w", encoding="utf-8") - ) +class SubprocessWrapperInterface(AbstractBaseClass): + """An abstract class (interface) that defines abstract methods for running + subprocess commands. + + An interface is defined so that we can easily mock the subprocess API in our + unit tests. + """ + + @abstractmethod + def run_cmd( + self, + cmd: str, + capture_output: bool = False, + output_file: Optional[pathlib.Path] = None, + verbose: bool = False, + ) -> subprocess.CompletedProcess: + """A wrapper around the `subprocess.run` function for executing system commands.""" + + +class SubprocessWrapper(SubprocessWrapperInterface): + """A concrete implementation of the `SubprocessWrapperInterface` abstract class.""" + + def run_cmd( + self, + cmd: str, + capture_output: bool = False, + output_file: Optional[pathlib.Path] = None, + verbose: bool = False, + ) -> subprocess.CompletedProcess: + kwargs: Any = {} + with contextlib.ExitStack() as stack: + if capture_output: + kwargs["text"] = True + kwargs["stdout"] = subprocess.PIPE else: - kwargs["stdout"] = None if verbose else subprocess.DEVNULL + if output_file: + kwargs["stdout"] = stack.enter_context( + output_file.open("w", encoding="utf-8") + ) + else: + kwargs["stdout"] = None if verbose else subprocess.DEVNULL kwargs["stderr"] = subprocess.STDOUT - if verbose: - print(cmd) - proc = subprocess.run(cmd, shell=True, check=True, **kwargs) + if verbose: + print(cmd) + proc = subprocess.run(cmd, shell=True, check=True, **kwargs) - return proc + return proc diff --git a/tests/common.py b/tests/common.py index 6673d053..ceeef428 100644 --- a/tests/common.py +++ b/tests/common.py @@ -1,14 +1,19 @@ """Helper functions for `pytest`.""" import os +from subprocess import CompletedProcess, CalledProcessError from pathlib import Path +from typing import Optional + +from benchcab.utils.subprocess import SubprocessWrapperInterface +from benchcab.environment_modules import EnvironmentModulesInterface MOCK_CWD = TMP_DIR = Path(os.environ["TMPDIR"], "benchcab_tests") -def make_barebones_config() -> dict: +def get_mock_config() -> dict: """Returns a valid mock config.""" - conf = { + config = { "project": "bar", "experiment": "five-site-test", "modules": [ @@ -51,4 +56,48 @@ def make_barebones_config() -> dict: }, ], } - return conf + return config + + +class MockSubprocessWrapper(SubprocessWrapperInterface): + """A mock implementation of `SubprocessWrapperInterface` used for testing.""" + + def __init__(self) -> None: + self.commands: list[str] = [] + self.stdout = "mock standard output" + self.error_on_call = False + + def run_cmd( + self, + cmd: str, + capture_output: bool = False, + output_file: Optional[Path] = None, + verbose: bool = False, + ) -> CompletedProcess: + self.commands.append(cmd) + if self.error_on_call: + raise CalledProcessError(returncode=1, cmd=cmd, output=self.stdout) + if output_file: + output_file.touch() + return CompletedProcess(cmd, returncode=0, stdout=self.stdout) + + +class MockEnvironmentModules(EnvironmentModulesInterface): + """A mock implementation of `EnvironmentModulesInterface` used for testing.""" + + def __init__(self) -> None: + self.commands: list[str] = [] + + def module_is_avail(self, *args: str) -> bool: + self.commands.append("module is-avail " + " ".join(args)) + return True + + def module_is_loaded(self, *args: str) -> bool: + self.commands.append("module is-loaded " + " ".join(args)) + return True + + def module_load(self, *args: str) -> None: + self.commands.append("module load " + " ".join(args)) + + def module_unload(self, *args: str) -> None: + self.commands.append("module unload " + " ".join(args)) diff --git a/tests/conftest.py b/tests/conftest.py index af8546d6..8b764969 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,7 +1,6 @@ """Contains pytest fixtures accessible to all tests in the same directory.""" import shutil -import unittest.mock import pytest from .common import MOCK_CWD @@ -15,12 +14,9 @@ def run_around_tests(): if MOCK_CWD.exists(): shutil.rmtree(MOCK_CWD) MOCK_CWD.mkdir() - patcher = unittest.mock.patch("benchcab.internal.CWD", MOCK_CWD) - patcher.start() # Run the test: yield # Teardown: - patcher.stop() shutil.rmtree(MOCK_CWD) diff --git a/tests/test_bench_config.py b/tests/test_bench_config.py index c2639983..505e7103 100644 --- a/tests/test_bench_config.py +++ b/tests/test_bench_config.py @@ -5,7 +5,7 @@ import yaml from tests.common import TMP_DIR -from tests.common import make_barebones_config +from tests.common import get_mock_config from benchcab.bench_config import check_config, read_config from benchcab import internal @@ -14,93 +14,63 @@ def test_check_config(): """Tests for `check_config()`.""" # Success case: test barebones config is valid - config = make_barebones_config() + config = get_mock_config() check_config(config) # Success case: branch configuration with missing name key - config = make_barebones_config() + config = get_mock_config() config["realisations"][0].pop("name") check_config(config) # Success case: branch configuration with missing revision key - config = make_barebones_config() + config = get_mock_config() config["realisations"][0].pop("revision") check_config(config) # Success case: branch configuration with missing patch key - config = make_barebones_config() + config = get_mock_config() config["realisations"][0].pop("patch") check_config(config) # Success case: test config when realisations contains more than two keys - config = make_barebones_config() + config = get_mock_config() config["realisations"].append({"path": "path/to/my_new_branch"}) assert len(config["realisations"]) > 2 check_config(config) # Success case: test config when realisations contains less than two keys - config = make_barebones_config() + config = get_mock_config() config["realisations"].pop() assert len(config["realisations"]) < 2 check_config(config) # Success case: test experiment with site id from the # five-site-test is valid - config = make_barebones_config() + config = get_mock_config() config["experiment"] = "AU-Tum" check_config(config) # Success case: test config without science_configurations is valid - config = make_barebones_config() + config = get_mock_config() config.pop("science_configurations") check_config(config) - # Failure case: test config without project key raises an exception + # Failure case: test missing required keys raises an exception with pytest.raises( ValueError, - match="The config file does not list all required entries. " - "Those are: " + ", ".join(internal.CONFIG_REQUIRED_KEYS), + match="Keys are missing from the config file: project, experiment", ): - config = make_barebones_config() + config = get_mock_config() config.pop("project") - check_config(config) - - # Failure case: test config without realisations key raises an exception - with pytest.raises( - ValueError, - match="The config file does not list all required entries. " - "Those are: " + ", ".join(internal.CONFIG_REQUIRED_KEYS), - ): - config = make_barebones_config() - config.pop("realisations") + config.pop("experiment") check_config(config) # Failure case: test config with empty realisations key raises an exception with pytest.raises(ValueError, match="The 'realisations' key cannot be empty."): - config = make_barebones_config() + config = get_mock_config() config["realisations"] = [] check_config(config) - # Failure case: test config without modules key raises an exception - with pytest.raises( - ValueError, - match="The config file does not list all required entries. " - "Those are: " + ", ".join(internal.CONFIG_REQUIRED_KEYS), - ): - config = make_barebones_config() - config.pop("modules") - check_config(config) - - # Failure case: test config without experiment key raises an exception - with pytest.raises( - ValueError, - match="The config file does not list all required entries. " - "Those are: " + ", ".join(internal.CONFIG_REQUIRED_KEYS), - ): - config = make_barebones_config() - config.pop("experiment") - check_config(config) - # Failure case: test config with invalid experiment key raises an exception with pytest.raises( ValueError, @@ -111,7 +81,7 @@ def test_check_config(): + internal.MEORG_EXPERIMENTS["five-site-test"] ), ): - config = make_barebones_config() + config = get_mock_config() config["experiment"] = "foo" check_config(config) @@ -126,7 +96,7 @@ def test_check_config(): + internal.MEORG_EXPERIMENTS["five-site-test"] ), ): - config = make_barebones_config() + config = get_mock_config() config["experiment"] = "CH-Dav" check_config(config) @@ -134,7 +104,7 @@ def test_check_config(): with pytest.raises( ValueError, match="Realisation '1' must specify the `path` field." ): - config = make_barebones_config() + config = get_mock_config() config["realisations"][1].pop("path") check_config(config) @@ -143,25 +113,25 @@ def test_check_config(): with pytest.raises( ValueError, match="The 'science_configurations' key cannot be empty." ): - config = make_barebones_config() + config = get_mock_config() config["science_configurations"] = [] check_config(config) # Failure case: project key is not a string with pytest.raises(TypeError, match="The 'project' key must be a string."): - config = make_barebones_config() + config = get_mock_config() config["project"] = 123 check_config(config) # Failure case: realisations key is not a list with pytest.raises(TypeError, match="The 'realisations' key must be a list."): - config = make_barebones_config() + config = get_mock_config() config["realisations"] = {"foo": "bar"} check_config(config) # Failure case: realisations key is not a list of dict with pytest.raises(TypeError, match="Realisation '0' must be a dictionary object."): - config = make_barebones_config() + config = get_mock_config() config["realisations"] = ["foo"] check_config(config) @@ -169,7 +139,7 @@ def test_check_config(): with pytest.raises( TypeError, match="The 'name' field in realisation '1' must be a string." ): - config = make_barebones_config() + config = get_mock_config() config["realisations"][1]["name"] = 1234 check_config(config) @@ -177,7 +147,7 @@ def test_check_config(): with pytest.raises( TypeError, match="The 'path' field in realisation '1' must be a string." ): - config = make_barebones_config() + config = get_mock_config() config["realisations"][1]["path"] = 1234 check_config(config) @@ -185,7 +155,7 @@ def test_check_config(): with pytest.raises( TypeError, match="The 'revision' field in realisation '1' must be an integer." ): - config = make_barebones_config() + config = get_mock_config() config["realisations"][1]["revision"] = "-1" check_config(config) @@ -195,7 +165,7 @@ def test_check_config(): match="The 'patch' field in realisation '1' must be a dictionary that is " "compatible with the f90nml python package.", ): - config = make_barebones_config() + config = get_mock_config() config["realisations"][1]["patch"] = r"cable_user%ENABLE_SOME_FEATURE = .FALSE." check_config(config) @@ -203,19 +173,19 @@ def test_check_config(): with pytest.raises( TypeError, match="The 'build_script' field in realisation '1' must be a string." ): - config = make_barebones_config() + config = get_mock_config() config["realisations"][1]["build_script"] = ["echo", "hello"] check_config(config) # Failure case: modules key is not a list with pytest.raises(TypeError, match="The 'modules' key must be a list."): - config = make_barebones_config() + config = get_mock_config() config["modules"] = "netcdf" check_config(config) # Failure case: experiment key is not a string with pytest.raises(TypeError, match="The 'experiment' key must be a string."): - config = make_barebones_config() + config = get_mock_config() config["experiment"] = 0 check_config(config) @@ -223,7 +193,7 @@ def test_check_config(): with pytest.raises( TypeError, match="The 'science_configurations' key must be a list." ): - config = make_barebones_config() + config = get_mock_config() config["science_configurations"] = r"cable_user%GS_SWITCH = 'medlyn'" check_config(config) @@ -233,7 +203,7 @@ def test_check_config(): match="Science config settings must be specified using a dictionary " "that is compatible with the f90nml python package.", ): - config = make_barebones_config() + config = get_mock_config() config["science_configurations"] = [r"cable_user%GS_SWITCH = 'medlyn'"] check_config(config) @@ -242,7 +212,7 @@ def test_read_config(): """Tests for `read_config()`.""" # Success case: write config to file, then read config from file - config = make_barebones_config() + config = get_mock_config() filename = TMP_DIR / "config-barebones.yaml" with open(filename, "w", encoding="utf-8") as file: @@ -251,62 +221,3 @@ def test_read_config(): res = read_config(filename) os.remove(filename) assert config == res - - # Success case: a specified branch with a missing name key - # should return a config with name set to the base name of - # the path key - config = make_barebones_config() - config["realisations"][0].pop("name") - filename = TMP_DIR / "config-barebones.yaml" - - with open(filename, "w", encoding="utf-8") as file: - yaml.dump(config, file) - - res = read_config(filename) - os.remove(filename) - assert config != res - assert res["realisations"][0]["name"] == os.path.basename( - config["realisations"][0]["path"] - ) - - # Success case: a specified branch with a missing revision number - # should return a config with the default revision number - config = make_barebones_config() - config["realisations"][0].pop("revision") - filename = TMP_DIR / "config-barebones.yaml" - - with open(filename, "w", encoding="utf-8") as file: - yaml.dump(config, file) - - res = read_config(filename) - os.remove(filename) - assert config != res - assert res["realisations"][0]["revision"] == -1 - - # Success case: a specified branch with a missing patch dictionary - # should return a config with patch set to its default value - config = make_barebones_config() - config["realisations"][0].pop("patch") - filename = TMP_DIR / "config-barebones.yaml" - - with open(filename, "w", encoding="utf-8") as file: - yaml.dump(config, file) - - res = read_config(filename) - os.remove(filename) - assert config != res - assert res["realisations"][0]["patch"] == {} - - # Success case: a config with missing science_configurations key should return a - # config with config['science_configurations'] set to its default value - config = make_barebones_config() - config.pop("science_configurations") - filename = TMP_DIR / "config-barebones.yaml" - - with open(filename, "w", encoding="utf-8") as file: - yaml.dump(config, file) - - res = read_config(filename) - os.remove(filename) - assert config != res - assert res["science_configurations"] == internal.DEFAULT_SCIENCE_CONFIGURATIONS diff --git a/tests/test_benchcab.py b/tests/test_benchcab.py new file mode 100644 index 00000000..df84d518 --- /dev/null +++ b/tests/test_benchcab.py @@ -0,0 +1,61 @@ +"""`pytest` tests for benchcab.py""" + +import contextlib +import io +from subprocess import CalledProcessError +import pytest + +from benchcab.benchcab import Benchcab +from benchcab import internal +from benchcab.utils.subprocess import SubprocessWrapperInterface +from .common import MockSubprocessWrapper, get_mock_config, MOCK_CWD + + +def get_mock_app( + subprocess_handler: SubprocessWrapperInterface = MockSubprocessWrapper(), +) -> Benchcab: + """Returns a mock `Benchcab` instance for testing against.""" + config = get_mock_config() + app = Benchcab(argv=["benchcab", "fluxnet"], config=config, validate_env=False) + app.subprocess_handler = subprocess_handler + app.root_dir = MOCK_CWD + return app + + +def test_fluxnet_submit_job(): + """Tests for `Benchcab.fluxnet_submit_job()`.""" + + # Success case: test qsub command is executed + mock_subprocess = MockSubprocessWrapper() + app = get_mock_app(mock_subprocess) + app.fluxnet_submit_job() + assert f"qsub {MOCK_CWD / internal.QSUB_FNAME}" in mock_subprocess.commands + + # Success case: test non-verbose output + app = get_mock_app() + with contextlib.redirect_stdout(io.StringIO()) as buf: + app.fluxnet_submit_job() + assert buf.getvalue() == ( + "Creating PBS job script to run FLUXNET tasks on compute " + f"nodes: {internal.QSUB_FNAME}\n" + "PBS job submitted: mock standard output\n" + "The CABLE log file for each task is written to " + f"{internal.SITE_LOG_DIR}/_log.txt\n" + "The CABLE standard output for each task is written to " + f"{internal.SITE_TASKS_DIR}//out.txt\n" + "The NetCDF output for each task is written to " + f"{internal.SITE_OUTPUT_DIR}/_out.nc\n" + ) + + # Failure case: qsub non-zero exit code prints an error message + mock_subprocess = MockSubprocessWrapper() + mock_subprocess.error_on_call = True + app = get_mock_app(subprocess_handler=mock_subprocess) + with contextlib.redirect_stdout(io.StringIO()) as buf: + with pytest.raises(CalledProcessError): + app.fluxnet_submit_job() + assert buf.getvalue() == ( + "Creating PBS job script to run FLUXNET tasks on compute " + f"nodes: {internal.QSUB_FNAME}\n" + "Error when submitting job to NCI queue\nmock standard output\n" + ) diff --git a/tests/test_benchtree.py b/tests/test_benchtree.py index 1a9a5a48..d4b7a39a 100644 --- a/tests/test_benchtree.py +++ b/tests/test_benchtree.py @@ -7,8 +7,9 @@ from tests.common import MOCK_CWD -from tests.common import make_barebones_config +from tests.common import get_mock_config from benchcab.task import Task +from benchcab.repository import CableRepository from benchcab.benchtree import ( setup_fluxnet_directory_tree, clean_directory_tree, @@ -19,22 +20,23 @@ def setup_mock_tasks() -> list[Task]: """Return a mock list of fluxnet tasks.""" - config = make_barebones_config() - (branch_id_a, branch_a), (branch_id_b, branch_b) = enumerate(config["realisations"]) + config = get_mock_config() + repo_a = CableRepository("trunk", repo_id=0) + repo_b = CableRepository("path/to/my-branch", repo_id=1) met_site_a, met_site_b = "site_foo", "site_bar" (sci_id_a, sci_config_a), (sci_id_b, sci_config_b) = enumerate( config["science_configurations"] ) tasks = [ - Task(branch_id_a, branch_a["name"], {}, met_site_a, sci_id_a, sci_config_a), - Task(branch_id_a, branch_a["name"], {}, met_site_a, sci_id_b, sci_config_b), - Task(branch_id_a, branch_a["name"], {}, met_site_b, sci_id_a, sci_config_a), - Task(branch_id_a, branch_a["name"], {}, met_site_b, sci_id_b, sci_config_b), - Task(branch_id_b, branch_b["name"], {}, met_site_a, sci_id_a, sci_config_a), - Task(branch_id_b, branch_b["name"], {}, met_site_a, sci_id_b, sci_config_b), - Task(branch_id_b, branch_b["name"], {}, met_site_b, sci_id_a, sci_config_a), - Task(branch_id_b, branch_b["name"], {}, met_site_b, sci_id_b, sci_config_b), + Task(repo_a, met_site_a, sci_id_a, sci_config_a), + Task(repo_a, met_site_a, sci_id_b, sci_config_b), + Task(repo_a, met_site_b, sci_id_a, sci_config_a), + Task(repo_a, met_site_b, sci_id_b, sci_config_b), + Task(repo_b, met_site_a, sci_id_a, sci_config_a), + Task(repo_b, met_site_a, sci_id_b, sci_config_b), + Task(repo_b, met_site_b, sci_id_a, sci_config_a), + Task(repo_b, met_site_b, sci_id_b, sci_config_b), ] return tasks @@ -45,7 +47,7 @@ def test_setup_directory_tree(): # Success case: generate fluxnet directory structure tasks = setup_mock_tasks() - setup_fluxnet_directory_tree(fluxnet_tasks=tasks) + setup_fluxnet_directory_tree(fluxnet_tasks=tasks, root_dir=MOCK_CWD) assert len(list(MOCK_CWD.glob("*"))) == 1 assert Path(MOCK_CWD, "runs").exists() @@ -68,7 +70,7 @@ def test_setup_directory_tree(): # Success case: test non-verbose output with contextlib.redirect_stdout(io.StringIO()) as buf: - setup_fluxnet_directory_tree(fluxnet_tasks=tasks) + setup_fluxnet_directory_tree(fluxnet_tasks=tasks, root_dir=MOCK_CWD) assert buf.getvalue() == ( f"Creating runs/site/logs directory: {MOCK_CWD}/runs/site/logs\n" f"Creating runs/site/outputs directory: {MOCK_CWD}/runs/site/outputs\n" @@ -83,7 +85,9 @@ def test_setup_directory_tree(): # Success case: test verbose output with contextlib.redirect_stdout(io.StringIO()) as buf: - setup_fluxnet_directory_tree(fluxnet_tasks=tasks, verbose=True) + setup_fluxnet_directory_tree( + fluxnet_tasks=tasks, verbose=True, root_dir=MOCK_CWD + ) assert buf.getvalue() == ( f"Creating runs/site/logs directory: {MOCK_CWD}/runs/site/logs\n" f"Creating runs/site/outputs directory: {MOCK_CWD}/runs/site/outputs\n" @@ -110,13 +114,13 @@ def test_clean_directory_tree(): # Success case: directory tree does not exist after clean tasks = setup_mock_tasks() - setup_fluxnet_directory_tree(fluxnet_tasks=tasks) + setup_fluxnet_directory_tree(fluxnet_tasks=tasks, root_dir=MOCK_CWD) - clean_directory_tree() + clean_directory_tree(root_dir=MOCK_CWD) assert not Path(MOCK_CWD, "runs").exists() - setup_src_dir() - clean_directory_tree() + setup_src_dir(root_dir=MOCK_CWD) + clean_directory_tree(root_dir=MOCK_CWD) assert not Path(MOCK_CWD, "src").exists() @@ -124,5 +128,5 @@ def test_setup_src_dir(): """Tests for `setup_src_dir()`.""" # Success case: make src directory - setup_src_dir() + setup_src_dir(root_dir=MOCK_CWD) assert Path(MOCK_CWD, "src").exists() diff --git a/tests/test_build_cable.py b/tests/test_build_cable.py deleted file mode 100644 index 4f17d7d5..00000000 --- a/tests/test_build_cable.py +++ /dev/null @@ -1,167 +0,0 @@ -"""`pytest` tests for build_cable.py""" - -import unittest.mock -import io -import contextlib -import pytest - -from benchcab import internal -from benchcab.build_cable import remove_module_lines, default_build, custom_build -from .common import MOCK_CWD - - -def test_remove_module_lines(): - """Tests for `remove_module_lines()`.""" - - # Success case: test 'module' lines are removed from mock shell script - file_path = MOCK_CWD / "test-build.sh" - with open(file_path, "w", encoding="utf-8") as file: - file.write( - """#!/bin/bash -module add bar -module purge - -host_gadi() -{ - . /etc/bashrc - module purge - module add intel-compiler/2019.5.281 - module add netcdf/4.6.3 - module load foo - modules - echo foo && module load - echo foo # module load - # module load foo - - if [[ $1 = 'mpi' ]]; then - module add intel-mpi/2019.5.281 - fi -} -""" - ) - - remove_module_lines(file_path) - - with open(file_path, "r", encoding="utf-8") as file: - assert file.read() == ( - """#!/bin/bash - -host_gadi() -{ - . /etc/bashrc - modules - echo foo # module load - # module load foo - - if [[ $1 = 'mpi' ]]; then - fi -} -""" - ) - - -@unittest.mock.patch("benchcab.environment_modules.module_load") -@unittest.mock.patch("benchcab.environment_modules.module_unload") -def test_default_build( - mock_module_unload, mock_module_load -): # pylint: disable=unused-argument - """Tests for `default_build()`.""" - - build_script_path = ( - MOCK_CWD / internal.SRC_DIR / "test-branch" / "offline" / "build3.sh" - ) - build_script_path.parent.mkdir(parents=True) - build_script_path.touch() - - # Success case: execute the default build command - with unittest.mock.patch( - "benchcab.utils.subprocess.run_cmd", autospec=True - ) as mock_run_cmd: - default_build("test-branch", ["foo", "bar"]) - mock_run_cmd.assert_called_once_with("./tmp-build3.sh", verbose=False) - - # Success case: execute the default build command with verbose enabled - # TODO(Sean): this test should be removed once we use the logging module - with unittest.mock.patch( - "benchcab.utils.subprocess.run_cmd", autospec=True - ) as mock_run_cmd: - default_build("test-branch", ["foo", "bar"], verbose=True) - mock_run_cmd.assert_called_once_with("./tmp-build3.sh", verbose=True) - - # Success case: test non-verbose standard output - with unittest.mock.patch("benchcab.utils.subprocess.run_cmd"): - with contextlib.redirect_stdout(io.StringIO()) as buf: - default_build("test-branch", ["foo", "bar"]) - assert buf.getvalue() == ( - "Compiling CABLE serially for realisation test-branch...\n" - ) - - # Success case: test verbose standard output - with unittest.mock.patch("benchcab.utils.subprocess.run_cmd"): - with contextlib.redirect_stdout(io.StringIO()) as buf: - default_build("test-branch", ["foo", "bar"], verbose=True) - assert buf.getvalue() == ( - "Compiling CABLE serially for realisation test-branch...\n" - f"Copying {build_script_path} to {build_script_path.parent}/tmp-build3.sh\n" - f"chmod +x {build_script_path.parent}/tmp-build3.sh\n" - "Modifying tmp-build3.sh: remove lines that call environment " - "modules\n" - "Loading modules: foo bar\n" - "Unloading modules: foo bar\n" - ) - - # Failure case: cannot find default build script - build_script_path.unlink() - with unittest.mock.patch("benchcab.utils.subprocess.run_cmd"): - with pytest.raises( - FileNotFoundError, - match=f"The default build script, {MOCK_CWD}/src/test-branch/offline/build3.sh, " - "could not be found. Do you need to specify a different build script with the " - "'build_script' option in config.yaml?", - ): - default_build("test-branch", ["foo", "bar"]) - - -def test_custom_build(): - """Tests for `custom_build()`.""" - - build_script_path = ( - MOCK_CWD / internal.SRC_DIR / "test-branch" / "offline" / "build3.sh" - ) - build_script_path.parent.mkdir(parents=True) - build_script_path.touch() - - # Success case: execute custom build command - with unittest.mock.patch( - "benchcab.utils.subprocess.run_cmd", autospec=True - ) as mock_run_cmd: - custom_build(build_script_path, "test-branch") - mock_run_cmd.assert_called_once_with( - f"./{build_script_path.name}", verbose=False - ) - - # Success case: execute custom build command with verbose enabled - # TODO(Sean): this test should be removed once we use the logging module - with unittest.mock.patch( - "benchcab.utils.subprocess.run_cmd", autospec=True - ) as mock_run_cmd: - custom_build(build_script_path, "test-branch", verbose=True) - mock_run_cmd.assert_called_once_with( - f"./{build_script_path.name}", verbose=True - ) - - # Success case: test non-verbose standard output - with unittest.mock.patch("benchcab.utils.subprocess.run_cmd"): - with contextlib.redirect_stdout(io.StringIO()) as buf: - custom_build(build_script_path, "test-branch") - assert buf.getvalue() == ( - "Compiling CABLE using custom build script for realisation test-branch...\n" - ) - - # Success case: test verbose standard output - with unittest.mock.patch("benchcab.utils.subprocess.run_cmd"): - with contextlib.redirect_stdout(io.StringIO()) as buf: - custom_build(build_script_path, "test-branch", verbose=True) - assert buf.getvalue() == ( - "Compiling CABLE using custom build script for realisation test-branch...\n" - ) diff --git a/tests/test_comparison.py b/tests/test_comparison.py new file mode 100644 index 00000000..496914cd --- /dev/null +++ b/tests/test_comparison.py @@ -0,0 +1,87 @@ +"""`pytest` tests for comparison.py""" + +import contextlib +import io + +from benchcab import internal +from benchcab.comparison import ComparisonTask +from benchcab.utils.subprocess import SubprocessWrapperInterface +from .common import MOCK_CWD, MockSubprocessWrapper + + +def get_mock_comparison_task( + subprocess_handler: SubprocessWrapperInterface = MockSubprocessWrapper(), +) -> ComparisonTask: + """Returns a mock `ComparisonTask` instance for testing against.""" + comparison_task = ComparisonTask( + files=(MOCK_CWD / "file_a.nc", MOCK_CWD / "file_b.nc"), + task_name="mock_comparison_task_name", + ) + comparison_task.subprocess_handler = subprocess_handler + comparison_task.root_dir = MOCK_CWD + return comparison_task + + +def test_run_comparison(): + """Tests for `run_comparison()`.""" + + file_a = MOCK_CWD / "file_a.nc" + file_b = MOCK_CWD / "file_b.nc" + bitwise_cmp_dir = MOCK_CWD / internal.SITE_BITWISE_CMP_DIR + bitwise_cmp_dir.mkdir(parents=True) + + # Success case: run comparison + mock_subprocess = MockSubprocessWrapper() + task = get_mock_comparison_task(subprocess_handler=mock_subprocess) + task.run() + assert f"nccmp -df {file_a} {file_b}" in mock_subprocess.commands + + # Success case: test non-verbose output + task = get_mock_comparison_task() + with contextlib.redirect_stdout(io.StringIO()) as buf: + task.run() + assert ( + buf.getvalue() == f"Success: files {file_a.name} {file_b.name} are identical\n" + ) + + # Success case: test verbose output + task = get_mock_comparison_task() + with contextlib.redirect_stdout(io.StringIO()) as buf: + task.run(verbose=True) + assert buf.getvalue() == ( + f"Comparing files {file_a.name} and {file_b.name} bitwise...\n" + f"Success: files {file_a.name} {file_b.name} are identical\n" + ) + + stdout_file = bitwise_cmp_dir / "mock_comparison_task_name.txt" + + # Failure case: test failed comparison check (files differ) + mock_subprocess = MockSubprocessWrapper() + mock_subprocess.error_on_call = True + task = get_mock_comparison_task(subprocess_handler=mock_subprocess) + task.run() + with open(stdout_file, "r", encoding="utf-8") as file: + assert file.read() == "mock standard output" + + # Failure case: test non-verbose standard output on failure + mock_subprocess = MockSubprocessWrapper() + mock_subprocess.error_on_call = True + task = get_mock_comparison_task(subprocess_handler=mock_subprocess) + with contextlib.redirect_stdout(io.StringIO()) as buf: + task.run() + assert buf.getvalue() == ( + f"Failure: files {file_a.name} {file_b.name} differ. Results of diff " + f"have been written to {stdout_file}\n" + ) + + # Failure case: test verbose standard output on failure + mock_subprocess = MockSubprocessWrapper() + mock_subprocess.error_on_call = True + task = get_mock_comparison_task(subprocess_handler=mock_subprocess) + with contextlib.redirect_stdout(io.StringIO()) as buf: + task.run(verbose=True) + assert buf.getvalue() == ( + f"Comparing files {file_a.name} and {file_b.name} bitwise...\n" + f"Failure: files {file_a.name} {file_b.name} differ. Results of diff " + f"have been written to {stdout_file}\n" + ) diff --git a/tests/test_get_cable.py b/tests/test_get_cable.py deleted file mode 100644 index f8073120..00000000 --- a/tests/test_get_cable.py +++ /dev/null @@ -1,250 +0,0 @@ -"""`pytest` tests for get_cable.py""" - -import unittest.mock -import shutil -import io -import contextlib -import pytest - -from benchcab import internal -from benchcab.get_cable import ( - checkout_cable, - checkout_cable_auxiliary, - svn_info_show_item, - next_path, -) -from .common import MOCK_CWD - - -def setup_mock_branch_config() -> dict: - """Returns a mock branch config.""" - return { - "name": "trunk", - "revision": 9000, - "path": "trunk", - "patch": {}, - "build_script": "", - } - - -def mock_svn_info_show_item(*args, **kwargs): # pylint: disable=unused-argument - """Side effect function used to mock `svn_info_show_item()`""" - item = args[1] - return {"url": "/url/to/test-branch", "revision": "123"}[item] - - -def test_checkout_cable(): - """Tests for `checkout_cable()`.""" - - with unittest.mock.patch( - "benchcab.get_cable.svn_info_show_item", mock_svn_info_show_item - ): - # Success case: checkout mock branch repository from SVN - with unittest.mock.patch( - "benchcab.utils.subprocess.run_cmd", autospec=True - ) as mock_run_cmd: - branch_config = setup_mock_branch_config() - assert checkout_cable(branch_config) == MOCK_CWD / "src" / "trunk" - mock_run_cmd.assert_called_once_with( - "svn checkout -r 9000 https://trac.nci.org.au/svn/cable/trunk " - f"{MOCK_CWD}/src/trunk", - verbose=False, - ) - - # Success case: checkout mock branch repository from SVN with verbose enabled - # TODO(Sean): this test should be removed once we use the logging module - with unittest.mock.patch( - "benchcab.utils.subprocess.run_cmd", autospec=True - ) as mock_run_cmd: - branch_config = setup_mock_branch_config() - assert ( - checkout_cable(branch_config, verbose=True) - == MOCK_CWD / "src" / "trunk" - ) - mock_run_cmd.assert_called_once_with( - "svn checkout -r 9000 https://trac.nci.org.au/svn/cable/trunk " - f"{MOCK_CWD}/src/trunk", - verbose=True, - ) - - # Success case: specify default revision number - with unittest.mock.patch( - "benchcab.utils.subprocess.run_cmd", autospec=True - ) as mock_run_cmd: - branch_config = setup_mock_branch_config() - branch_config["revision"] = -1 - assert checkout_cable(branch_config) == MOCK_CWD / "src" / "trunk" - mock_run_cmd.assert_called_once_with( - f"svn checkout https://trac.nci.org.au/svn/cable/trunk {MOCK_CWD}/src/trunk", - verbose=False, - ) - - # Success case: test non-verbose standard output - with unittest.mock.patch("benchcab.utils.subprocess.run_cmd"): - with contextlib.redirect_stdout(io.StringIO()) as buf: - checkout_cable(branch_config) - assert buf.getvalue() == "Successfully checked out trunk at revision 123\n" - - # Success case: test verbose standard output - with unittest.mock.patch("benchcab.utils.subprocess.run_cmd"): - with contextlib.redirect_stdout(io.StringIO()) as buf: - checkout_cable(branch_config, verbose=True) - assert buf.getvalue() == "Successfully checked out trunk at revision 123\n" - - -def test_checkout_cable_auxiliary(): - """Tests for `checkout_cable_auxiliary()`.""" - - grid_file_path = MOCK_CWD / internal.GRID_FILE - phen_file_path = MOCK_CWD / internal.PHEN_FILE - cnpbiome_file_path = MOCK_CWD / internal.CNPBIOME_FILE - - # Generate mock files in CABLE-AUX as a side effect - def touch_files(*args, **kwargs): # pylint: disable=unused-argument - grid_file_path.parent.mkdir(parents=True, exist_ok=True) - grid_file_path.touch() - phen_file_path.parent.mkdir(parents=True, exist_ok=True) - phen_file_path.touch() - cnpbiome_file_path.parent.mkdir(parents=True, exist_ok=True) - cnpbiome_file_path.touch() - - with unittest.mock.patch( - "benchcab.get_cable.svn_info_show_item", mock_svn_info_show_item - ): - # Success case: checkout CABLE-AUX repository - with unittest.mock.patch( - "benchcab.utils.subprocess.run_cmd", autospec=True - ) as mock_run_cmd: - mock_run_cmd.side_effect = touch_files - checkout_cable_auxiliary() - mock_run_cmd.assert_called_once_with( - "svn checkout https://trac.nci.org.au/svn/cable/branches/Share/CABLE-AUX " - f"{MOCK_CWD}/{internal.CABLE_AUX_DIR}", - verbose=False, - ) - shutil.rmtree(MOCK_CWD / internal.CABLE_AUX_DIR) - - # Success case: checkout CABLE-AUX repository with verbose enabled - # TODO(Sean): this test should be removed once we use the logging module - with unittest.mock.patch( - "benchcab.utils.subprocess.run_cmd", autospec=True - ) as mock_run_cmd: - mock_run_cmd.side_effect = touch_files - checkout_cable_auxiliary(verbose=True) - mock_run_cmd.assert_called_once_with( - "svn checkout https://trac.nci.org.au/svn/cable/branches/Share/CABLE-AUX " - f"{MOCK_CWD}/{internal.CABLE_AUX_DIR}", - verbose=True, - ) - shutil.rmtree(MOCK_CWD / internal.CABLE_AUX_DIR) - - # Success case: test non-verbose standard output - with unittest.mock.patch( - "benchcab.utils.subprocess.run_cmd", autospec=True - ) as mock_run_cmd: - mock_run_cmd.side_effect = touch_files - with contextlib.redirect_stdout(io.StringIO()) as buf: - checkout_cable_auxiliary() - assert ( - buf.getvalue() == "Successfully checked out CABLE-AUX at revision 123\n" - ) - shutil.rmtree(MOCK_CWD / internal.CABLE_AUX_DIR) - - # Success case: test verbose standard output - with unittest.mock.patch( - "benchcab.utils.subprocess.run_cmd", autospec=True - ) as mock_run_cmd: - mock_run_cmd.side_effect = touch_files - with contextlib.redirect_stdout(io.StringIO()) as buf: - checkout_cable_auxiliary(verbose=True) - assert ( - buf.getvalue() == "Successfully checked out CABLE-AUX at revision 123\n" - ) - shutil.rmtree(MOCK_CWD / internal.CABLE_AUX_DIR) - - with unittest.mock.patch("benchcab.utils.subprocess.run_cmd"): - # Failure case: missing grid file in CABLE-AUX repository - touch_files() - grid_file_path.unlink() - with pytest.raises( - RuntimeError, - match=f"Error checking out CABLE-AUX: cannot find file '{internal.GRID_FILE}'", - ): - checkout_cable_auxiliary() - shutil.rmtree(MOCK_CWD / internal.CABLE_AUX_DIR) - - # Failure case: missing phen file in CABLE-AUX repository - touch_files() - phen_file_path.unlink() - with pytest.raises( - RuntimeError, - match=f"Error checking out CABLE-AUX: cannot find file '{internal.PHEN_FILE}'", - ): - checkout_cable_auxiliary() - shutil.rmtree(MOCK_CWD / internal.CABLE_AUX_DIR) - - # Failure case: missing cnpbiome file in CABLE-AUX repository - touch_files() - cnpbiome_file_path.unlink() - with pytest.raises( - RuntimeError, - match=f"Error checking out CABLE-AUX: cannot find file '{internal.CNPBIOME_FILE}'", - ): - checkout_cable_auxiliary() - shutil.rmtree(MOCK_CWD / internal.CABLE_AUX_DIR) - - -def test_svn_info_show_item(): - """Tests for `svn_info_show_item()`.""" - - # Success case: run command for mock item - with unittest.mock.patch( - "benchcab.utils.subprocess.run_cmd", autospec=True - ) as mock_run_cmd, unittest.mock.patch( - "subprocess.CompletedProcess", autospec=True - ) as mock_completed_process: - mock_completed_process.configure_mock(stdout="standard output from command") - mock_run_cmd.return_value = mock_completed_process - assert ( - svn_info_show_item("foo", "some-mock-item") - == "standard output from command" - ) - mock_run_cmd.assert_called_once_with( - "svn info --show-item some-mock-item foo", capture_output=True - ) - - # Success case: test leading and trailing white space is removed from standard output - with unittest.mock.patch( - "benchcab.utils.subprocess.run_cmd", autospec=True - ) as mock_run_cmd, unittest.mock.patch( - "subprocess.CompletedProcess", autospec=True - ) as mock_completed_process: - mock_completed_process.configure_mock( - stdout=" standard output from command\n " - ) - mock_run_cmd.return_value = mock_completed_process - assert ( - svn_info_show_item("foo", "some-mock-item") - == "standard output from command" - ) - mock_run_cmd.assert_called_once_with( - "svn info --show-item some-mock-item foo", capture_output=True - ) - - -def test_next_path(): - """Tests for `next_path()`.""" - - pattern = "rev_number-*.log" - - # Success case: get next path in 'empty' CWD - assert len(list(MOCK_CWD.glob(pattern))) == 0 - ret = next_path(pattern) - assert ret == "rev_number-1.log" - - # Success case: get next path in 'non-empty' CWD - ret_path = MOCK_CWD / ret - ret_path.touch() - assert len(list(MOCK_CWD.glob(pattern))) == 1 - ret = next_path(pattern) - assert ret == "rev_number-2.log" diff --git a/tests/test_job_script.py b/tests/test_job_script.py deleted file mode 100644 index 58a4655d..00000000 --- a/tests/test_job_script.py +++ /dev/null @@ -1,248 +0,0 @@ -"""`pytest` tests for job_script.py""" - -import unittest.mock -import io -import subprocess -import contextlib -from pathlib import Path -import pytest - -from benchcab import internal -from benchcab.job_script import get_local_storage_flag, submit_job -from .common import MOCK_CWD - - -def test_get_local_storage_flag(): - """Tests for `get_local_storage_flag()`.""" - - # Success case: scratch dir storage flag - assert get_local_storage_flag(Path("/scratch/tm70/foo")) == "scratch/tm70" - - # Success case: gdata storage flag - assert get_local_storage_flag(Path("/g/data/tm70/foo")) == "gdata/tm70" - - # Failure case: invalid path - with pytest.raises( - RuntimeError, match="Current directory structure unknown on Gadi." - ): - get_local_storage_flag(Path("/home/189/foo")) - - -def test_submit_job(): - """Tests for `submit_job()`.""" - - # Success case: test qsub command is executed - with unittest.mock.patch( - "benchcab.job_script.get_local_storage_flag", autospec=True - ) as mock_get_local_storage_flag, unittest.mock.patch( - "benchcab.utils.subprocess.run_cmd", autospec=True - ) as mock_run_cmd: - mock_get_local_storage_flag.return_value = "storage_flag" - submit_job( - project="tm70", - config_path="/path/to/config.yaml", - modules=["foo", "bar", "baz"], - ) - mock_run_cmd.assert_called_once_with( - f"qsub {MOCK_CWD/internal.QSUB_FNAME}", capture_output=True, verbose=False - ) - - # Success case: test qsub command is executed with verbose enabled - # TODO(Sean): this test should be removed once we use the logging module - with unittest.mock.patch( - "benchcab.job_script.get_local_storage_flag", autospec=True - ) as mock_get_local_storage_flag, unittest.mock.patch( - "benchcab.utils.subprocess.run_cmd", autospec=True - ) as mock_run_cmd: - mock_get_local_storage_flag.return_value = "storage_flag" - submit_job( - project="tm70", - config_path="/path/to/config.yaml", - modules=["foo", "bar", "baz"], - verbose=True, - ) - mock_run_cmd.assert_called_once_with( - f"qsub {MOCK_CWD/internal.QSUB_FNAME}", capture_output=True, verbose=True - ) - - # Success case: test default job script generated is correct - with unittest.mock.patch( - "benchcab.job_script.get_local_storage_flag", autospec=True - ) as mock_get_local_storage_flag, unittest.mock.patch( - "benchcab.utils.subprocess.run_cmd", - ): - mock_get_local_storage_flag.return_value = "storage_flag" - submit_job( - project="tm70", - config_path="/path/to/config.yaml", - modules=["foo", "bar", "baz"], - ) - with open(MOCK_CWD / internal.QSUB_FNAME, "r", encoding="utf-8") as file: - assert ( - file.read() - == f"""#!/bin/bash -#PBS -l wd -#PBS -l ncpus={internal.NCPUS} -#PBS -l mem={internal.MEM} -#PBS -l walltime={internal.WALL_TIME} -#PBS -q normal -#PBS -P tm70 -#PBS -j oe -#PBS -m e -#PBS -l storage=gdata/ks32+gdata/hh5+gdata/tm70+storage_flag - -module purge -module use /g/data/hh5/public/modules -module load conda/analysis3-unstable -module load foo -module load bar -module load baz - -benchcab fluxnet-run-tasks --config=/path/to/config.yaml -if [ $? -ne 0 ]; then - echo 'Error: benchcab fluxnet-run-tasks failed. Exiting...' - exit 1 -fi - -benchcab fluxnet-bitwise-cmp --config=/path/to/config.yaml -if [ $? -ne 0 ]; then - echo 'Error: benchcab fluxnet-bitwise-cmp failed. Exiting...' - exit 1 -fi -""" - ) - - # Success case: skip fluxnet-bitwise-cmp step - with unittest.mock.patch( - "benchcab.job_script.get_local_storage_flag", autospec=True - ) as mock_get_local_storage_flag, unittest.mock.patch( - "benchcab.utils.subprocess.run_cmd" - ): - mock_get_local_storage_flag.return_value = "storage_flag" - submit_job( - project="tm70", - config_path="/path/to/config.yaml", - modules=["foo", "bar", "baz"], - skip_bitwise_cmp=True, - ) - with open(MOCK_CWD / internal.QSUB_FNAME, "r", encoding="utf-8") as file: - assert ( - file.read() - == f"""#!/bin/bash -#PBS -l wd -#PBS -l ncpus={internal.NCPUS} -#PBS -l mem={internal.MEM} -#PBS -l walltime={internal.WALL_TIME} -#PBS -q normal -#PBS -P tm70 -#PBS -j oe -#PBS -m e -#PBS -l storage=gdata/ks32+gdata/hh5+gdata/tm70+storage_flag - -module purge -module use /g/data/hh5/public/modules -module load conda/analysis3-unstable -module load foo -module load bar -module load baz - -benchcab fluxnet-run-tasks --config=/path/to/config.yaml -if [ $? -ne 0 ]; then - echo 'Error: benchcab fluxnet-run-tasks failed. Exiting...' - exit 1 -fi - -""" - ) - - # Success case: test non-verbose output - with unittest.mock.patch( - "benchcab.job_script.get_local_storage_flag" - ), unittest.mock.patch( - "benchcab.utils.subprocess.run_cmd", autospec=True - ) as mock_run_cmd, unittest.mock.patch( - "subprocess.CompletedProcess", autospec=True - ) as mock_completed_process: - mock_completed_process.configure_mock(stdout="standard output from qsub") - mock_run_cmd.return_value = mock_completed_process - with contextlib.redirect_stdout(io.StringIO()) as buf: - submit_job( - project="tm70", - config_path="/path/to/config.yaml", - modules=["foo", "bar", "baz"], - ) - assert buf.getvalue() == ( - "Creating PBS job script to run FLUXNET tasks on compute " - f"nodes: {internal.QSUB_FNAME}\n" - "PBS job submitted: standard output from qsub\n" - ) - - # Success case: add verbose flag to job script - with unittest.mock.patch( - "benchcab.job_script.get_local_storage_flag", autospec=True - ) as mock_get_local_storage_flag, unittest.mock.patch( - "benchcab.utils.subprocess.run_cmd" - ): - mock_get_local_storage_flag.return_value = "storage_flag" - submit_job( - project="tm70", - config_path="/path/to/config.yaml", - modules=["foo", "bar", "baz"], - verbose=True, - ) - with open(MOCK_CWD / internal.QSUB_FNAME, "r", encoding="utf-8") as file: - assert ( - file.read() - == f"""#!/bin/bash -#PBS -l wd -#PBS -l ncpus={internal.NCPUS} -#PBS -l mem={internal.MEM} -#PBS -l walltime={internal.WALL_TIME} -#PBS -q normal -#PBS -P tm70 -#PBS -j oe -#PBS -m e -#PBS -l storage=gdata/ks32+gdata/hh5+gdata/tm70+storage_flag - -module purge -module use /g/data/hh5/public/modules -module load conda/analysis3-unstable -module load foo -module load bar -module load baz - -benchcab fluxnet-run-tasks --config=/path/to/config.yaml -v -if [ $? -ne 0 ]; then - echo 'Error: benchcab fluxnet-run-tasks failed. Exiting...' - exit 1 -fi - -benchcab fluxnet-bitwise-cmp --config=/path/to/config.yaml -v -if [ $? -ne 0 ]; then - echo 'Error: benchcab fluxnet-bitwise-cmp failed. Exiting...' - exit 1 -fi -""" - ) - - # Failure case: qsub non-zero exit code prints an error message - with unittest.mock.patch( - "benchcab.job_script.get_local_storage_flag" - ), unittest.mock.patch( - "benchcab.utils.subprocess.run_cmd", autospec=True - ) as mock_run_cmd: - mock_run_cmd.side_effect = subprocess.CalledProcessError( - 1, "dummy-cmd", stderr="standard error from qsub" - ) - with contextlib.redirect_stdout(io.StringIO()) as buf: - with pytest.raises(subprocess.CalledProcessError): - submit_job( - project="tm70", - config_path="/path/to/config.yaml", - modules=["foo", "bar", "baz"], - ) - assert buf.getvalue() == ( - "Creating PBS job script to run FLUXNET tasks on compute " - f"nodes: {internal.QSUB_FNAME}\n" - "Error when submitting job to NCI queue\nstandard error from qsub\n" - ) diff --git a/tests/test_logging.py b/tests/test_logging.py new file mode 100644 index 00000000..8af56e01 --- /dev/null +++ b/tests/test_logging.py @@ -0,0 +1,22 @@ +"""`pytest` tests for utils/logging.py""" + +from benchcab.utils.logging import next_path +from .common import MOCK_CWD + + +def test_next_path(): + """Tests for `next_path()`.""" + + pattern = "rev_number-*.log" + + # Success case: get next path in 'empty' CWD + assert len(list(MOCK_CWD.glob(pattern))) == 0 + ret = next_path(MOCK_CWD, pattern) + assert ret == "rev_number-1.log" + + # Success case: get next path in 'non-empty' CWD + ret_path = MOCK_CWD / ret + ret_path.touch() + assert len(list(MOCK_CWD.glob(pattern))) == 1 + ret = next_path(MOCK_CWD, pattern) + assert ret == "rev_number-2.log" diff --git a/tests/test_pbs.py b/tests/test_pbs.py new file mode 100644 index 00000000..711e47a1 --- /dev/null +++ b/tests/test_pbs.py @@ -0,0 +1,82 @@ +"""`pytest` tests for utils/pbs.py""" + +from benchcab.utils.pbs import render_job_script +from benchcab import internal + + +def test_render_job_script(): + """Tests for `render_job_script()`.""" + + # Success case: test default job script generated is correct + assert render_job_script( + project="tm70", + config_path="/path/to/config.yaml", + modules=["foo", "bar", "baz"], + storage_flags=["scratch/tm70"], + ) == ( + f"""#!/bin/bash +#PBS -l wd +#PBS -l ncpus={internal.NCPUS} +#PBS -l mem={internal.MEM} +#PBS -l walltime={internal.WALL_TIME} +#PBS -q normal +#PBS -P tm70 +#PBS -j oe +#PBS -m e +#PBS -l storage=gdata/ks32+gdata/hh5+gdata/tm70+scratch/tm70 + +module purge +module use /g/data/hh5/public/modules +module load conda/analysis3-unstable +module load foo +module load bar +module load baz + +benchcab fluxnet-run-tasks --config=/path/to/config.yaml +if [ $? -ne 0 ]; then + echo 'Error: benchcab fluxnet-run-tasks failed. Exiting...' + exit 1 +fi + +benchcab fluxnet-bitwise-cmp --config=/path/to/config.yaml +if [ $? -ne 0 ]; then + echo 'Error: benchcab fluxnet-bitwise-cmp failed. Exiting...' + exit 1 +fi +""" + ) + + # Success case: skip fluxnet-bitwise-cmp step + assert render_job_script( + project="tm70", + config_path="/path/to/config.yaml", + modules=["foo", "bar", "baz"], + storage_flags=["scratch/tm70"], + skip_bitwise_cmp=True, + ) == ( + f"""#!/bin/bash +#PBS -l wd +#PBS -l ncpus={internal.NCPUS} +#PBS -l mem={internal.MEM} +#PBS -l walltime={internal.WALL_TIME} +#PBS -q normal +#PBS -P tm70 +#PBS -j oe +#PBS -m e +#PBS -l storage=gdata/ks32+gdata/hh5+gdata/tm70+scratch/tm70 + +module purge +module use /g/data/hh5/public/modules +module load conda/analysis3-unstable +module load foo +module load bar +module load baz + +benchcab fluxnet-run-tasks --config=/path/to/config.yaml +if [ $? -ne 0 ]; then + echo 'Error: benchcab fluxnet-run-tasks failed. Exiting...' + exit 1 +fi + +""" + ) diff --git a/tests/test_repository.py b/tests/test_repository.py new file mode 100644 index 00000000..ad10b22c --- /dev/null +++ b/tests/test_repository.py @@ -0,0 +1,239 @@ +"""`pytest` tests for repository.py""" + +import io +import contextlib +import pytest + +from benchcab import internal +from benchcab.environment_modules import EnvironmentModulesInterface +from benchcab.utils.subprocess import SubprocessWrapperInterface +from benchcab.repository import CableRepository, remove_module_lines +from .common import MOCK_CWD, MockEnvironmentModules, MockSubprocessWrapper + + +def get_mock_repo( + subprocess_handler: SubprocessWrapperInterface = MockSubprocessWrapper(), + modules_handler: EnvironmentModulesInterface = MockEnvironmentModules(), +) -> CableRepository: + """Returns a mock `CableRepository` instance for testing against.""" + repo = CableRepository(path="trunk") + repo.root_dir = MOCK_CWD + repo.subprocess_handler = subprocess_handler + repo.modules_handler = modules_handler + return repo + + +def test_repo_id(): + """Tests for `CableRepository.repo_id`.""" + + # Success case: get repository ID + repo = CableRepository("path/to/repo", repo_id=123) + assert repo.repo_id == 123 + + # Success case: set repository ID + repo = CableRepository("path/to/repo", repo_id=123) + repo.repo_id = 456 + assert repo.repo_id == 456 + + # Failure case: access undefined repository ID + repo = CableRepository("path/to/repo") + with pytest.raises(RuntimeError, match="Attempting to access undefined repo ID"): + _ = repo.repo_id + + +def test_checkout(): + """Tests for `CableRepository.checkout()`.""" + + # Success case: checkout mock repository + mock_subprocess = MockSubprocessWrapper() + repo = get_mock_repo(mock_subprocess) + repo.checkout() + assert ( + f"svn checkout https://trac.nci.org.au/svn/cable/trunk {MOCK_CWD}/src/trunk" + in mock_subprocess.commands + ) + + # Success case: checkout mock repository with specified revision number + mock_subprocess = MockSubprocessWrapper() + repo = get_mock_repo(mock_subprocess) + repo.revision = 9000 + repo.checkout() + assert ( + f"svn checkout -r 9000 https://trac.nci.org.au/svn/cable/trunk {MOCK_CWD}/src/trunk" + in mock_subprocess.commands + ) + + # Success case: test non-verbose standard output + mock_subprocess = MockSubprocessWrapper() + repo = get_mock_repo(mock_subprocess) + with contextlib.redirect_stdout(io.StringIO()) as buf: + repo.checkout() + assert ( + buf.getvalue() + == f"Successfully checked out trunk at revision {mock_subprocess.stdout}\n" + ) + + # Success case: test verbose standard output + mock_subprocess = MockSubprocessWrapper() + repo = get_mock_repo(mock_subprocess) + with contextlib.redirect_stdout(io.StringIO()) as buf: + repo.checkout(verbose=True) + assert ( + buf.getvalue() + == f"Successfully checked out trunk at revision {mock_subprocess.stdout}\n" + ) + + +def test_svn_info_show_item(): + """Tests for `CableRepository.svn_info_show_item()`.""" + + # Success case: call svn info command and get result + mock_subprocess = MockSubprocessWrapper() + mock_subprocess.stdout = "mock standard output" + repo = get_mock_repo(mock_subprocess) + assert repo.svn_info_show_item("some-mock-item") == "mock standard output" + assert ( + f"svn info --show-item some-mock-item {MOCK_CWD}/src/trunk" + in mock_subprocess.commands + ) + + # Success case: test leading and trailing white space is removed from standard output + mock_subprocess = MockSubprocessWrapper() + mock_subprocess.stdout = " \n\n mock standard output \n\n" + repo = get_mock_repo(mock_subprocess) + assert repo.svn_info_show_item("some-mock-item") == "mock standard output" + assert ( + f"svn info --show-item some-mock-item {MOCK_CWD}/src/trunk" + in mock_subprocess.commands + ) + + +def test_build(): + """Tests for `CableRepository.build()`.""" + build_script_path = MOCK_CWD / internal.SRC_DIR / "trunk" / "offline" / "build3.sh" + build_script_path.parent.mkdir(parents=True) + build_script_path.touch() + + # Success case: execute the default build command + mock_subprocess = MockSubprocessWrapper() + mock_modules = MockEnvironmentModules() + repo = get_mock_repo(mock_subprocess, mock_modules) + repo.build(["foo", "bar"]) + assert "./tmp-build3.sh" in mock_subprocess.commands + assert "module load foo bar" in mock_modules.commands + assert "module unload foo bar" in mock_modules.commands + + # Success case: test non-verbose standard output + repo = get_mock_repo() + with contextlib.redirect_stdout(io.StringIO()) as buf: + repo.build(["foo", "bar"]) + assert buf.getvalue() == ("Compiling CABLE serially for realisation trunk...\n") + + # Success case: test verbose standard output + repo = get_mock_repo() + with contextlib.redirect_stdout(io.StringIO()) as buf: + repo.build(["foo", "bar"], verbose=True) + assert buf.getvalue() == ( + "Compiling CABLE serially for realisation trunk...\n" + f"Copying {build_script_path} to {build_script_path.parent}/tmp-build3.sh\n" + f"chmod +x {build_script_path.parent}/tmp-build3.sh\n" + "Modifying tmp-build3.sh: remove lines that call environment " + "modules\n" + "Loading modules: foo bar\n" + "Unloading modules: foo bar\n" + ) + + # Failure case: cannot find default build script + build_script_path.unlink() + repo = get_mock_repo() + with pytest.raises( + FileNotFoundError, + match=f"The default build script, {MOCK_CWD}/src/trunk/offline/build3.sh, " + "could not be found. Do you need to specify a different build script with the " + "'build_script' option in config.yaml?", + ): + repo.build(["foo", "bar"]) + + +def test_custom_build(): + """Tests for `custom_build()`.""" + + build_script = "offline/build.sh" + build_script_path = MOCK_CWD / internal.SRC_DIR / "trunk" / build_script + build_script_path.parent.mkdir(parents=True) + build_script_path.touch() + + # Success case: execute custom build command + mock_subprocess = MockSubprocessWrapper() + repo = get_mock_repo(subprocess_handler=mock_subprocess) + repo.build_script = build_script + repo.custom_build() + assert f"./{build_script_path.name}" in mock_subprocess.commands + + # Success case: test non-verbose standard output + repo = get_mock_repo() + repo.build_script = build_script + with contextlib.redirect_stdout(io.StringIO()) as buf: + repo.custom_build() + assert buf.getvalue() == ( + "Compiling CABLE using custom build script for realisation trunk...\n" + ) + + # Success case: test verbose standard output + repo = get_mock_repo() + repo.build_script = build_script + with contextlib.redirect_stdout(io.StringIO()) as buf: + repo.custom_build(verbose=True) + assert buf.getvalue() == ( + "Compiling CABLE using custom build script for realisation trunk...\n" + ) + + +def test_remove_module_lines(): + """Tests for `remove_module_lines()`.""" + + # Success case: test 'module' lines are removed from mock shell script + file_path = MOCK_CWD / "test-build.sh" + with open(file_path, "w", encoding="utf-8") as file: + file.write( + """#!/bin/bash +module add bar +module purge + +host_gadi() +{ + . /etc/bashrc + module purge + module add intel-compiler/2019.5.281 + module add netcdf/4.6.3 + module load foo + modules + echo foo && module load + echo foo # module load + # module load foo + + if [[ $1 = 'mpi' ]]; then + module add intel-mpi/2019.5.281 + fi +} +""" + ) + + remove_module_lines(file_path) + + with open(file_path, "r", encoding="utf-8") as file: + assert file.read() == ( + """#!/bin/bash + +host_gadi() +{ + . /etc/bashrc + modules + echo foo # module load + # module load foo + + if [[ $1 = 'mpi' ]]; then + fi +} +""" + ) diff --git a/tests/test_subprocess.py b/tests/test_subprocess.py index 02c68d41..5f75b39e 100644 --- a/tests/test_subprocess.py +++ b/tests/test_subprocess.py @@ -3,71 +3,89 @@ import subprocess import pytest -from benchcab.utils.subprocess import run_cmd +from benchcab.utils.subprocess import SubprocessWrapper from .common import TMP_DIR def test_run_cmd(capfd): """Tests for `run_cmd()`.""" + subprocess_handler = SubprocessWrapper() + # Success case: test stdout is suppressed in non-verbose mode - run_cmd("echo foo") + subprocess_handler.run_cmd("echo foo") captured = capfd.readouterr() - assert captured.out == "" - assert captured.err == "" + assert not captured.out + assert not captured.err # Success case: test stderr is suppressed in non-verbose mode - run_cmd("echo foo 1>&2") + subprocess_handler.run_cmd("echo foo 1>&2") captured = capfd.readouterr() - assert captured.out == "" - assert captured.err == "" + assert not captured.out + assert not captured.err # Success case: test stdout is printed in verbose mode - run_cmd("echo foo", verbose=True) + subprocess_handler.run_cmd("echo foo", verbose=True) captured = capfd.readouterr() assert captured.out == "echo foo\nfoo\n" - assert captured.err == "" + assert not captured.err # Success case: test stderr is redirected to stdout in verbose mode - run_cmd("echo foo 1>&2", verbose=True) + subprocess_handler.run_cmd("echo foo 1>&2", verbose=True) captured = capfd.readouterr() assert captured.out == "echo foo 1>&2\nfoo\n" - assert captured.err == "" + assert not captured.err # Success case: test output is captured with capture_output enabled - proc = run_cmd("echo foo", capture_output=True) + proc = subprocess_handler.run_cmd("echo foo", capture_output=True) + captured = capfd.readouterr() + assert not captured.out + assert not captured.err + assert proc.stdout == "foo\n" + assert not proc.stderr + + # Success case: test stderr is captured and redirected to stdout with + # capture_output enabled + proc = subprocess_handler.run_cmd("echo foo 1>&2", capture_output=True) captured = capfd.readouterr() - assert captured.out == "" - assert captured.err == "" + assert not captured.out + assert not captured.err assert proc.stdout == "foo\n" - assert proc.stderr == "" + assert not proc.stderr # Success case: test command is printed in verbose mode - proc = run_cmd("echo foo", capture_output=True, verbose=True) + proc = subprocess_handler.run_cmd("echo foo", capture_output=True, verbose=True) captured = capfd.readouterr() assert captured.out == "echo foo\n" - assert captured.err == "" + assert not captured.err assert proc.stdout == "foo\n" - assert proc.stderr == "" + assert not proc.stderr # Success case: redirect output to file descriptor file_path = TMP_DIR / "out.txt" - run_cmd("echo foo", output_file=file_path) + subprocess_handler.run_cmd("echo foo", output_file=file_path) with open(file_path, "r", encoding="utf-8") as file: assert file.read() == "foo\n" captured = capfd.readouterr() - assert captured.out == "" - assert captured.err == "" + assert not captured.out + assert not captured.err # Success case: redirect output to file descriptor in verbose mode file_path = TMP_DIR / "out.txt" - run_cmd("echo foo", output_file=file_path, verbose=True) - with open(file_path, "r", encoding="utf-8") as file: + subprocess_handler.run_cmd("echo foo", output_file=file_path, verbose=True) + with file_path.open("r", encoding="utf-8") as file: assert file.read() == "foo\n" captured = capfd.readouterr() assert captured.out == "echo foo\n" - assert captured.err == "" + assert not captured.err # Failure case: check non-zero return code throws an exception with pytest.raises(subprocess.CalledProcessError): - run_cmd("exit 1") + subprocess_handler.run_cmd("exit 1") + + # Failure case: check stderr is redirected to stdout on non-zero + # return code + with pytest.raises(subprocess.CalledProcessError) as exc: + subprocess_handler.run_cmd("echo foo 1>&2; exit 1", capture_output=True) + assert exc.value.stdout == "foo\n" + assert not exc.value.stderr diff --git a/tests/test_task.py b/tests/test_task.py index 9a183fe9..bb3ec5ab 100644 --- a/tests/test_task.py +++ b/tests/test_task.py @@ -1,7 +1,5 @@ """`pytest` tests for task.py""" -import unittest.mock -import subprocess from pathlib import Path import io import contextlib @@ -14,25 +12,37 @@ get_fluxnet_tasks, get_fluxnet_comparisons, get_comparison_name, - run_comparison, Task, CableError, ) from benchcab import internal -from benchcab.benchtree import setup_fluxnet_directory_tree -from .common import MOCK_CWD, make_barebones_config +from benchcab.repository import CableRepository +from benchcab.utils.subprocess import SubprocessWrapperInterface +from .common import MOCK_CWD, get_mock_config, MockSubprocessWrapper -def setup_mock_task() -> Task: +def get_mock_task( + subprocess_handler: SubprocessWrapperInterface = MockSubprocessWrapper(), +) -> Task: """Returns a mock `Task` instance.""" + + repo = CableRepository( + repo_id=1, + path="path/to/test-branch", + patch={"cable": {"some_branch_specific_setting": True}}, + ) + repo.subprocess_handler = subprocess_handler + repo.root_dir = MOCK_CWD + task = Task( - branch_id=1, - branch_name="test-branch", - branch_patch={"cable": {"some_branch_specific_setting": True}}, + repo=repo, met_forcing_file="forcing-file.nc", sci_conf_id=0, sci_config={"cable": {"some_setting": True}}, ) + task.subprocess_handler = subprocess_handler + task.root_dir = MOCK_CWD + return task @@ -55,6 +65,16 @@ def setup_mock_namelists_directory(): assert cable_vegetation_nml_path.exists() +def setup_mock_run_directory(task: Task): + """Setup mock run directory for a single task.""" + task_dir = MOCK_CWD / internal.SITE_TASKS_DIR / task.get_task_name() + task_dir.mkdir(parents=True) + output_dir = MOCK_CWD / internal.SITE_OUTPUT_DIR + output_dir.mkdir(parents=True) + log_dir = MOCK_CWD / internal.SITE_LOG_DIR + log_dir.mkdir(parents=True) + + def do_mock_checkout_and_build(): """Setup mock repository that has been checked out and built.""" Path(MOCK_CWD, internal.SRC_DIR, "test-branch", "offline").mkdir(parents=True) @@ -80,21 +100,21 @@ def do_mock_run(task: Task): def test_get_task_name(): """Tests for `get_task_name()`.""" # Success case: check task name convention - task = setup_mock_task() + task = get_mock_task() assert task.get_task_name() == "forcing-file_R1_S0" def test_get_log_filename(): """Tests for `get_log_filename()`.""" # Success case: check log file name convention - task = setup_mock_task() + task = get_mock_task() assert task.get_log_filename() == "forcing-file_R1_S0_log.txt" def test_get_output_filename(): """Tests for `get_output_filename()`.""" # Success case: check output file name convention - task = setup_mock_task() + task = get_mock_task() assert task.get_output_filename() == "forcing-file_R1_S0_out.nc" @@ -102,10 +122,10 @@ def test_fetch_files(): """Tests for `fetch_files()`.""" # Success case: fetch files required to run CABLE - task = setup_mock_task() + task = get_mock_task() setup_mock_namelists_directory() - setup_fluxnet_directory_tree([task]) + setup_mock_run_directory(task) do_mock_checkout_and_build() task.fetch_files() @@ -131,10 +151,10 @@ def test_clean_task(): """Tests for `clean_task()`.""" # Success case: fetch then clean files - task = setup_mock_task() + task = get_mock_task() setup_mock_namelists_directory() - setup_fluxnet_directory_tree([task]) + setup_mock_run_directory(task) do_mock_checkout_and_build() task.fetch_files() @@ -198,11 +218,11 @@ def test_patch_namelist(): def test_setup_task(): """Tests for `setup_task()`.""" - task = setup_mock_task() + task = get_mock_task() task_dir = Path(MOCK_CWD, internal.SITE_TASKS_DIR, task.get_task_name()) setup_mock_namelists_directory() - setup_fluxnet_directory_tree([task]) + setup_mock_run_directory(task) do_mock_checkout_and_build() # Success case: test all settings are patched into task namelist file @@ -256,7 +276,8 @@ def test_setup_task(): def test_run_cable(): """Tests for `run_cable()`.""" - task = setup_mock_task() + mock_subprocess = MockSubprocessWrapper() + task = get_mock_task(subprocess_handler=mock_subprocess) task_dir = MOCK_CWD / internal.SITE_TASKS_DIR / task.get_task_name() task_dir.mkdir(parents=True) exe_path = task_dir / internal.CABLE_EXE @@ -267,55 +288,31 @@ def test_run_cable(): stdout_file = task_dir / internal.CABLE_STDOUT_FILENAME # Success case: run CABLE executable in subprocess - with unittest.mock.patch( - "benchcab.utils.subprocess.run_cmd", autospec=True - ) as mock_run_cmd: - task.run_cable() - mock_run_cmd.assert_called_once_with( - f"{exe_path} {nml_path}", - output_file=stdout_file, - verbose=False, - ) - - # Success case: run CABLE executable in subprocess with verbose enabled - # TODO(Sean): this test should be removed once we use the logging module - with unittest.mock.patch( - "benchcab.utils.subprocess.run_cmd", autospec=True - ) as mock_run_cmd: - task.run_cable(verbose=True) - mock_run_cmd.assert_called_once_with( - f"{exe_path} {nml_path}", - output_file=stdout_file, - verbose=True, - ) + task.run_cable() + assert f"{exe_path} {nml_path}" in mock_subprocess.commands + assert stdout_file.exists() # Success case: test non-verbose output - with unittest.mock.patch("benchcab.utils.subprocess.run_cmd"): - with contextlib.redirect_stdout(io.StringIO()) as buf: - task.run_cable() - assert not buf.getvalue() + with contextlib.redirect_stdout(io.StringIO()) as buf: + task.run_cable() + assert not buf.getvalue() # Success case: test verbose output - with unittest.mock.patch("benchcab.utils.subprocess.run_cmd"): - with contextlib.redirect_stdout(io.StringIO()) as buf: - task.run_cable(verbose=True) - assert not buf.getvalue() + with contextlib.redirect_stdout(io.StringIO()) as buf: + task.run_cable(verbose=True) + assert not buf.getvalue() # Failure case: raise CableError on subprocess non-zero exit code - with unittest.mock.patch( - "benchcab.utils.subprocess.run_cmd", autospec=True - ) as mock_run_cmd: - mock_run_cmd.side_effect = subprocess.CalledProcessError( - returncode=1, cmd="cmd" - ) - with pytest.raises(CableError): - task.run_cable() + mock_subprocess.error_on_call = True + with pytest.raises(CableError): + task.run_cable() def test_add_provenance_info(): """Tests for `add_provenance_info()`.""" - task = setup_mock_task() + mock_subprocess = MockSubprocessWrapper() + task = get_mock_task(subprocess_handler=mock_subprocess) task_dir = MOCK_CWD / internal.SITE_TASKS_DIR / task.get_task_name() task_dir.mkdir(parents=True) site_output_dir = MOCK_CWD / internal.SITE_OUTPUT_DIR @@ -331,35 +328,23 @@ def test_add_provenance_info(): nc_output_path = site_output_dir / task.get_output_filename() netCDF4.Dataset(nc_output_path, "w") - def mock_svn_info_show_item(*args, **kwargs): # pylint: disable=unused-argument - item = args[1] - return {"url": "/url/to/test-branch", "revision": "123"}[item] - # Success case: add global attributes to netcdf file - with unittest.mock.patch( - "benchcab.get_cable.svn_info_show_item", mock_svn_info_show_item - ): - task.add_provenance_info() - + task.add_provenance_info() with netCDF4.Dataset(str(nc_output_path), "r") as nc_output: atts = vars(nc_output) - assert atts["cable_branch"] == "/url/to/test-branch" - assert atts["svn_revision_number"] == "123" + assert atts["cable_branch"] == "mock standard output" + assert atts["svn_revision_number"] == "mock standard output" assert atts[r"filename%met"] == "/path/to/met/file" assert atts[r"filename%foo"] == 123 assert atts[r"bar"] == ".true." # Success case: test non-verbose output - with unittest.mock.patch( - "benchcab.get_cable.svn_info_show_item", mock_svn_info_show_item - ), contextlib.redirect_stdout(io.StringIO()) as buf: + with contextlib.redirect_stdout(io.StringIO()) as buf: task.add_provenance_info() assert not buf.getvalue() # Success case: test verbose output - with unittest.mock.patch( - "benchcab.get_cable.svn_info_show_item", mock_svn_info_show_item - ), contextlib.redirect_stdout(io.StringIO()) as buf: + with contextlib.redirect_stdout(io.StringIO()) as buf: task.add_provenance_info(verbose=True) assert buf.getvalue() == ( " Adding attributes to output file: " @@ -372,167 +357,106 @@ def test_get_fluxnet_tasks(): # Success case: get task list for two branches, two met # sites and two science configurations - config = make_barebones_config() - branch_a, branch_b = config["realisations"] + config = get_mock_config() + repos = [ + CableRepository(**branch_config, repo_id=id) + for id, branch_config in enumerate(config["realisations"]) + ] met_site_a, met_site_b = "foo", "bar" sci_a, sci_b = config["science_configurations"] - - assert get_fluxnet_tasks( - config["realisations"], + tasks = get_fluxnet_tasks( + repos, config["science_configurations"], [met_site_a, met_site_b], - ) == [ - Task(0, branch_a["name"], branch_a["patch"], met_site_a, 0, sci_a), - Task(0, branch_a["name"], branch_a["patch"], met_site_a, 1, sci_b), - Task(0, branch_a["name"], branch_a["patch"], met_site_b, 0, sci_a), - Task(0, branch_a["name"], branch_a["patch"], met_site_b, 1, sci_b), - Task(1, branch_b["name"], branch_b["patch"], met_site_a, 0, sci_a), - Task(1, branch_b["name"], branch_b["patch"], met_site_a, 1, sci_b), - Task(1, branch_b["name"], branch_b["patch"], met_site_b, 0, sci_a), - Task(1, branch_b["name"], branch_b["patch"], met_site_b, 1, sci_b), + ) + assert [(task.repo, task.met_forcing_file, task.sci_config) for task in tasks] == [ + (repos[0], met_site_a, sci_a), + (repos[0], met_site_a, sci_b), + (repos[0], met_site_b, sci_a), + (repos[0], met_site_b, sci_b), + (repos[1], met_site_a, sci_a), + (repos[1], met_site_a, sci_b), + (repos[1], met_site_b, sci_a), + (repos[1], met_site_b, sci_b), ] def test_get_fluxnet_comparisons(): """Tests for `get_fluxnet_comparisons()`.""" + output_dir = MOCK_CWD / internal.SITE_OUTPUT_DIR + # Success case: comparisons for two branches with two tasks # met0_S0_R0 met0_S0_R1 - config = make_barebones_config() - science_configurations = [{"foo": "bar"}] - met_sites = ["foo.nc"] - tasks = get_fluxnet_tasks(config["realisations"], science_configurations, met_sites) - assert len(tasks) == 2 - comparisons = get_fluxnet_comparisons(tasks) + task_a = Task( + repo=CableRepository("path/to/repo_a", repo_id=0), + met_forcing_file="foo.nc", + sci_config={"foo": "bar"}, + sci_conf_id=0, + ) + task_b = Task( + repo=CableRepository("path/to/repo_b", repo_id=1), + met_forcing_file="foo.nc", + sci_config={"foo": "bar"}, + sci_conf_id=0, + ) + comparisons = get_fluxnet_comparisons([task_a, task_b], root_dir=MOCK_CWD) assert len(comparisons) == 1 - assert all( - (task_a.sci_conf_id, task_a.met_forcing_file) - == (task_b.sci_conf_id, task_b.met_forcing_file) - for task_a, task_b in comparisons + assert comparisons[0].files == ( + output_dir / task_a.get_output_filename(), + output_dir / task_b.get_output_filename(), ) - assert (comparisons[0][0].branch_id, comparisons[0][1].branch_id) == (0, 1) + assert comparisons[0].task_name == "foo_S0_R0_R1" # Success case: comparisons for three branches with three tasks # met0_S0_R0 met0_S0_R1 met0_S0_R2 - config = make_barebones_config() - config["realisations"] += ( - { - "name": "new-branch", - "revision": -1, - "path": "path/to/new-branch", - "patch": {}, - "build_script": "", - }, + task_a = Task( + repo=CableRepository("path/to/repo_a", repo_id=0), + met_forcing_file="foo.nc", + sci_config={"foo": "bar"}, + sci_conf_id=0, + ) + task_b = Task( + repo=CableRepository("path/to/repo_b", repo_id=1), + met_forcing_file="foo.nc", + sci_config={"foo": "bar"}, + sci_conf_id=0, ) - science_configurations = [{"foo": "bar"}] - met_sites = ["foo.nc"] - tasks = get_fluxnet_tasks(config["realisations"], science_configurations, met_sites) - assert len(tasks) == 3 - comparisons = get_fluxnet_comparisons(tasks) + task_c = Task( + repo=CableRepository("path/to/repo_b", repo_id=2), + met_forcing_file="foo.nc", + sci_config={"foo": "bar"}, + sci_conf_id=0, + ) + comparisons = get_fluxnet_comparisons([task_a, task_b, task_c], root_dir=MOCK_CWD) assert len(comparisons) == 3 - assert all( - (task_a.sci_conf_id, task_a.met_forcing_file) - == (task_b.sci_conf_id, task_b.met_forcing_file) - for task_a, task_b in comparisons + assert comparisons[0].files == ( + output_dir / task_a.get_output_filename(), + output_dir / task_b.get_output_filename(), + ) + assert comparisons[1].files == ( + output_dir / task_a.get_output_filename(), + output_dir / task_c.get_output_filename(), + ) + assert comparisons[2].files == ( + output_dir / task_b.get_output_filename(), + output_dir / task_c.get_output_filename(), ) - assert (comparisons[0][0].branch_id, comparisons[0][1].branch_id) == (0, 1) - assert (comparisons[1][0].branch_id, comparisons[1][1].branch_id) == (0, 2) - assert (comparisons[2][0].branch_id, comparisons[2][1].branch_id) == (1, 2) + assert comparisons[0].task_name == "foo_S0_R0_R1" + assert comparisons[1].task_name == "foo_S0_R0_R2" + assert comparisons[2].task_name == "foo_S0_R1_R2" def test_get_comparison_name(): """Tests for `get_comparison_name()`.""" - # Success case: check comparison name convention - task_a = Task(0, "branch-a", {}, "foo", 0, {}) - task_b = Task(1, "branch-b", {}, "foo", 0, {}) - assert get_comparison_name(task_a, task_b) == "foo_S0_R0_R1" - - -def test_run_comparison(): - """Tests for `run_comparison()`.""" - - bitwise_cmp_dir = MOCK_CWD / internal.SITE_BITWISE_CMP_DIR - bitwise_cmp_dir.mkdir(parents=True) - output_dir = MOCK_CWD / internal.SITE_OUTPUT_DIR - task_a = Task(0, "branch-a", {}, "foo", 0, {}) - task_b = Task(1, "branch-b", {}, "foo", 0, {}) - - # Success case: run comparison - with unittest.mock.patch( - "benchcab.utils.subprocess.run_cmd", autospec=True - ) as mock_run_cmd: - run_comparison(task_a, task_b) - mock_run_cmd.assert_called_once_with( - f"nccmp -df {output_dir / task_a.get_output_filename()} " - f"{output_dir / task_b.get_output_filename()}", - capture_output=True, - verbose=False, - ) - - # Success case: run comparison with verbose enabled - # TODO(Sean): this test should be removed once we use the logging module - with unittest.mock.patch( - "benchcab.utils.subprocess.run_cmd", autospec=True - ) as mock_run_cmd: - run_comparison(task_a, task_b, verbose=True) - mock_run_cmd.assert_called_once_with( - f"nccmp -df {output_dir / task_a.get_output_filename()} " - f"{output_dir / task_b.get_output_filename()}", - capture_output=True, - verbose=True, - ) - - # Success case: test non-verbose output - with unittest.mock.patch("benchcab.utils.subprocess.run_cmd"): - with contextlib.redirect_stdout(io.StringIO()) as buf: - run_comparison(task_a, task_b) - assert buf.getvalue() == ( - f"Success: files {task_a.get_output_filename()} " - f"{task_b.get_output_filename()} are identical\n" - ) - - # Success case: test verbose output - with unittest.mock.patch("benchcab.utils.subprocess.run_cmd"): - with contextlib.redirect_stdout(io.StringIO()) as buf: - run_comparison(task_a, task_b, verbose=True) - assert buf.getvalue() == ( - f"Comparing files {task_a.get_output_filename()} and " - f"{task_b.get_output_filename()} bitwise...\n" - f"Success: files {task_a.get_output_filename()} " - f"{task_b.get_output_filename()} are identical\n" - ) - - with unittest.mock.patch( - "benchcab.utils.subprocess.run_cmd", autospec=True - ) as mock_run_cmd: - mock_run_cmd.side_effect = subprocess.CalledProcessError( - returncode=1, cmd="dummy cmd", output="error message from command" - ) - # Failure case: test failed comparison check (files differ) - run_comparison(task_a, task_b) - stdout_file = bitwise_cmp_dir / f"{get_comparison_name(task_a, task_b)}.txt" - with open(stdout_file, "r", encoding="utf-8") as file: - assert file.read() == "error message from command" - - # Failure case: test non-verbose standard output on failure - with contextlib.redirect_stdout(io.StringIO()) as buf: - run_comparison(task_a, task_b) - stdout_file = bitwise_cmp_dir / f"{get_comparison_name(task_a, task_b)}.txt" - assert buf.getvalue() == ( - f"Failure: files {task_a.get_output_filename()} " - f"{task_b.get_output_filename()} differ. Results of diff " - f"have been written to {stdout_file}\n" - ) - - # Failure case: test verbose standard output on failure - with contextlib.redirect_stdout(io.StringIO()) as buf: - run_comparison(task_a, task_b, verbose=True) - stdout_file = bitwise_cmp_dir / f"{get_comparison_name(task_a, task_b)}.txt" - assert buf.getvalue() == ( - f"Comparing files {task_a.get_output_filename()} and " - f"{task_b.get_output_filename()} bitwise...\n" - f"Failure: files {task_a.get_output_filename()} " - f"{task_b.get_output_filename()} differ. Results of diff " - f"have been written to {stdout_file}\n" + # Success case: check comparison name convention + assert ( + get_comparison_name( + CableRepository("path/to/repo", repo_id=0), + CableRepository("path/to/repo", repo_id=1), + met_forcing_file="foo.nc", + sci_conf_id=0, ) + == "foo_S0_R0_R1" + )