diff --git a/payu/models/cesm_cmeps.py b/payu/models/cesm_cmeps.py index 43432334..d9d9d41c 100644 --- a/payu/models/cesm_cmeps.py +++ b/payu/models/cesm_cmeps.py @@ -12,16 +12,17 @@ import os import re -import errno -import glob import shutil -import multiprocessing +from warnings import warn from payu.fsops import mkdir_p, make_symlink from payu.models.model import Model from payu.models.fms import fms_collate from payu.models.mom6 import mom6_add_parameter_files +NUOPC_CONFIG = "nuopc.runconfig" +NUOPC_RUNSEQ = "nuopc.runseq" + # Add as needed component_info = { "mom": { @@ -75,8 +76,8 @@ def __init__(self, expt, name, config): self.config_files = [ "drv_in", "fd.yaml", - "nuopc.runconfig", - "nuopc.runseq" + NUOPC_CONFIG, + NUOPC_RUNSEQ ] self.realms = ["ocn", "ice", "wav", "atm", "rof", "cpl"] @@ -85,7 +86,7 @@ def __init__(self, expt, name, config): self.rpointers = [] # To be inferred from nuopc.runconfig def get_runconfig(self, path): - self.runconfig = Runconfig(os.path.join(path, 'nuopc.runconfig')) + self.runconfig = Runconfig(os.path.join(path, NUOPC_CONFIG)) def get_components(self): """Get components from nuopc.runconfig""" @@ -156,15 +157,9 @@ def setup(self): self.runconfig.set("ALLCOMP_attributes", "start_type", start_type) - # Check pelayout makes sense - all_realms = self.realms + ["glc", "lnd"] - cpucount = int( - self.expt.config.get('ncpus', multiprocessing.cpu_count()) - ) - for realm in all_realms: - ntasks = int(self.runconfig.get("PELAYOUT_attributes", f"{realm}_ntasks")) - assert cpucount >= ntasks, "Insufficient cpus for the pelayout in nuopc.runconfig" - + # run checks on nuopc.runfig + self._setup_checks() + # Ensure that restarts will be written at the end of each run stop_n = self.runconfig.get("CLOCK_attributes", "stop_n") stop_option = self.runconfig.get("CLOCK_attributes", "stop_option") @@ -174,7 +169,7 @@ def setup(self): mkdir_p(os.path.join(self.work_path, 'log')) mkdir_p(os.path.join(self.work_path, 'timing')) - self.runconfig.write(os.path.join(self.work_path, 'nuopc.runconfig')) + self.runconfig.write(os.path.join(self.work_path, NUOPC_CONFIG)) # Horrible hack to make a link to the mod_def.ww3 input in the work # directory @@ -190,6 +185,89 @@ def setup(self): # TODO: copied this from other models. Surely we want to exit here or something print('payu: error: Unable to find mod_def.ww3 file in input directory') + def _setup_checks(self): + # check pelayout fits within requested cpucount + cpucount = int(self.expt.config.get('ncpus')) + all_realms = self.realms + for realm in all_realms: + ntasks = int(self.runconfig.get("PELAYOUT_attributes", f"{realm}_ntasks")) + nthreads = int(self.runconfig.get("PELAYOUT_attributes", f"{realm}_nthreads")) + rootpe = int(self.runconfig.get("PELAYOUT_attributes", f"{realm}_rootpe")) + pestride = int(self.runconfig.get("PELAYOUT_attributes", f"{realm}_pestride")) + + if nthreads < 1: + raise ValueError(f"The number of {realm}_nthreads ({nthreads}) in " + f"{NUOPC_CONFIG} must be at least 1.") + if nthreads > 1: + npes = nthreads*ntasks*pestride + # this is taken from + # https://github.com/ESCOMP/CMEPS/blob/5b7d76978e2fdc661ec2de4ba9834b985decadc6/cesm/driver/esm.F90#L1007 + # the correct calculation might be (ntasks-1)*pestride*nthreads + nthreads + else: + npes = (ntasks-1)*pestride + 1 + + if (rootpe + npes) > cpucount: + raise ValueError( + f"Insufficient cpus for the {realm} pelayout in {NUOPC_CONFIG}" + ) + + # check iolayout + if realm == "cpl" or realm == "med": + comp = "MED" # med and cpl names are both used in runconfig + else: + comp = realm.upper() + + if comp in self.runconfig.get_component_list(): + io_section = f"{comp}_modelio" + nc_type = self.runconfig.get(io_section, "pio_typename") + ioroot = int(self.runconfig.get(io_section, "pio_root")) + + if ioroot >= npes: + raise ValueError( + f"{io_section} pio_root exceeds available PEs (max: {npes - 1}) " + f"in {NUOPC_CONFIG}." + ) + + pio_async = self.runconfig.get(io_section, "pio_async_interface") + if pio_async == ".true.": + warn( + "Payu does not do consistency checks for asynchronous pio, as " + f"set in {io_section} of {NUOPC_CONFIG}. Consider adding them" + ) + elif pio_async == ".false.": + match nc_type: + case "netcdf": + break + case "netcdf4p" | "pnetcdf": + niotasks = int(self.runconfig.get(io_section, "pio_numiotasks")) + iostride = int(self.runconfig.get(io_section, "pio_stride")) + if (niotasks <= 0): + warn(f"The pio_numiotasks for {io_section} in {NUOPC_CONFIG} is " + "not set, using model default") + if (iostride <= 0): + warn(f"The pio_stride for {io_section} in {NUOPC_CONFIG} is " + "not set, using model default") + if (all([ + niotasks > 0, + iostride > 0, + (ioroot + (niotasks-1)*iostride) >= npes + ])): + raise ValueError( + f"The iolayout for {io_section} in {NUOPC_CONFIG} is " + "requesting out of range cpus" + ) + case "netcdf4c": + raise ValueError( + f"netcdf4c in {io_section} of {NUOPC_CONFIG} is deprecated, " + "use netcdf4p" + ) + case _: + raise ValueError( + f"The iotype for {io_section} in {NUOPC_CONFIG} is " + 'invalid, valid options: "netcdf", "pnetcdf", "netcdf4p"' + ) + return True + def archive(self): super().archive() @@ -313,6 +391,21 @@ def get(self, section, variable, value=None): else: return value + def get_component_list(self): + """ + Get the `component_list` + """ + m = re.search( + r'component_list:\s*(.*)', + self.contents + ) + + if m is not None: + components_str = m.group(1).strip() + return components_str.split() + else: + return None + def set(self, section, variable, new_value): """ Overwrite the value of any existing variable diff --git a/test/models/access-om3/test_access_om3.py b/test/models/access-om3/test_access_om3.py new file mode 100644 index 00000000..67a3b179 --- /dev/null +++ b/test/models/access-om3/test_access_om3.py @@ -0,0 +1,314 @@ +import copy +import os +import shutil +from pathlib import Path +import pytest + +import payu + +from test.common import cd, tmpdir, ctrldir, labdir, workdir, write_config, config_path +from test.common import config as config_orig +from test.common import make_inputs, make_exe + +MODEL = 'access-om3' + + +def setup_module(module): + """ + Put any test-wide setup code in here, e.g. creating test files + """ + + # Should be taken care of by teardown, in case remnants lying around + try: + shutil.rmtree(tmpdir) + except FileNotFoundError: + pass + + try: + tmpdir.mkdir() + labdir.mkdir() + ctrldir.mkdir() + workdir.mkdir() + # archive_dir.mkdir() + make_inputs() + make_exe() + except Exception as e: + print(e) + + +def teardown_module(module): + """ + Put any test-wide teardown code in here, e.g. removing test outputs + """ + + try: + shutil.rmtree(tmpdir) + print('removing tmp') + except Exception as e: + print(e) + + +def cmeps_config(ncpu): + # Create a config.yaml and nuopc.runconfig file + + config = copy.deepcopy(config_orig) + config['model'] = MODEL + config['ncpus'] = ncpu + + write_config(config) + + with open(os.path.join(ctrldir, 'nuopc.runconfig'), "w") as f: + f.close() + + +def teardown_cmeps_config(): + # Teardown + os.remove(config_path) + + +# Mock runconfig for some tests +# valid minimum nuopc.runconfig for _setup_checks +MOCK_IO_RUNCONF = { + "PELAYOUT_attributes": dict( + moc_ntasks=1, + moc_nthreads=1, + moc_pestride=1, + moc_rootpe=0 + ), + "MOC_modelio": dict( + pio_numiotasks=1, + pio_rearranger=1, + pio_root=0, + pio_stride=1, + pio_typename='netcdf4p', + pio_async_interface='.false.' + ) +} + + +class MockRunConfig: + + def __init__(self, config): + self.conf = config + + def get_component_list(self): + return ['MOC'] + + def get(self, section, variable, value=None): + return self.conf[section][variable] + + +@pytest.mark.parametrize("ncpu, moc_ntasks, moc_nthreads, moc_pestride, moc_rootpe", [ + (1, 1, 1, 1, 0), # min + (4, 4, 1, 1, 0), # min tasks + (4, 2, 2, 1, 0), # min tasks * threads + (4, 2, 1, 1, 2), # min tasks + rootpe + (4, 1, 2, 2, 0), # min threads * rootpe + (4, 1, 1, 1, 3), # max rootpe + (5, 2, 1, 4, 0), # max stride + (13, 4, 1, 3, 1), # odd ncpu + (13, 2, 3, 2, 1), # odd ncpu + (100000, 50000, 1, 2, 0), # max cpu + (100000, 1, 1, 1, 99999), # max cpu + ]) +@pytest.mark.filterwarnings("error") +def test__setup_checks_npes(ncpu, moc_ntasks, moc_nthreads, moc_pestride, moc_rootpe): + + cmeps_config(ncpu) + + test_runconf = copy.deepcopy(MOCK_IO_RUNCONF) + test_runconf["PELAYOUT_attributes"].update({ + "moc_ntasks": moc_ntasks, + "moc_nthreads": moc_nthreads, + "moc_pestride": moc_pestride, + "moc_rootpe": moc_rootpe + }) + + with cd(ctrldir): + lab = payu.laboratory.Laboratory(lab_path=str(labdir)) + expt = payu.experiment.Experiment(lab, reproduce=False) + model = expt.models[0] + + model.realms = ["moc"] + + model.runconfig = MockRunConfig(test_runconf) + + model._setup_checks() + + teardown_cmeps_config() + + +@pytest.mark.parametrize("ncpu, moc_ntasks, moc_nthreads, moc_pestride, moc_rootpe", [ + (1, 1, 1, 1, 1), # min + (4, 5, 1, 1, 0), # min tasks + (4, 1, 2, 2, 1), # min tasks * threads + (2, 1, 2, 1, 1), # threads > strides + (4, 1, 3, 1, 2), # min threads + rootpe + (4, 1, 1, 1, 4), # max rootpe + (13, 4, 1, 4, 1), # odd ncpu + (13, 2, 7, 7, 0), # odd ncpu + (100000, 50001, 1, 2, 0), # max cpu + (100000, 1, 1, 1, 100000), # max cpu + ]) +def test__setup_checks_too_many_pes(ncpu, moc_ntasks, moc_nthreads, moc_pestride, moc_rootpe): + + cmeps_config(ncpu) + + test_runconf = copy.deepcopy(MOCK_IO_RUNCONF) + test_runconf["PELAYOUT_attributes"].update({ + "moc_ntasks": moc_ntasks, + "moc_nthreads": moc_nthreads, + "moc_pestride": moc_pestride, + "moc_rootpe": moc_rootpe + }) + + with cd(ctrldir): + lab = payu.laboratory.Laboratory(lab_path=str(labdir)) + expt = payu.experiment.Experiment(lab, reproduce=False) + model = expt.models[0] + + model.realms = ["moc"] + + model.runconfig = MockRunConfig(test_runconf) + + with pytest.raises(ValueError): + model._setup_checks() + + teardown_cmeps_config() + + +@pytest.mark.parametrize("ncpu, pio_numiotasks, pio_stride, pio_root, pio_typename", [ + (1, 1, 1, 0, "netcdf"), # min + (2, 1, 1, 1, "netcdf"), # max root + (2, 2, 1, 0, "netcdf4p"), # min tasks + rootpe + (2, 1, 1, 1, "netcdf4p"), # max rootpe + (5, 3, 2, 0, "netcdf4p"), + (100000, 50001, 1, 2, "netcdf4p"), # odd ncpu + ]) +@pytest.mark.filterwarnings("error") +def test__setup_checks_io(ncpu, pio_numiotasks, pio_stride, pio_root, pio_typename): + + cmeps_config(ncpu) + + test_runconf = copy.deepcopy(MOCK_IO_RUNCONF) + test_runconf["PELAYOUT_attributes"].update({ + "moc_ntasks": ncpu + }) + test_runconf["MOC_modelio"].update(dict( + pio_numiotasks=pio_numiotasks, + pio_root=pio_root, + pio_stride=pio_stride, + pio_typename=pio_typename, + )) + + with cd(ctrldir): + lab = payu.laboratory.Laboratory(lab_path=str(labdir)) + expt = payu.experiment.Experiment(lab, reproduce=False) + model = expt.models[0] + + model.realms = ["moc"] + + model.runconfig = MockRunConfig(test_runconf) + + model._setup_checks() + + teardown_cmeps_config() + + +@pytest.mark.parametrize("ncpu, pio_numiotasks, pio_stride, pio_root, pio_typename", [ + (1, 1, 1, 0, "netcdf4c"), + (2, 1, 1, 2, "netcdf"), # root too big + (2, 3, 1, 0, "netcdf4p"), # too manu tasks + (2, 2, 2, 0, "netcdf4p"), # stride too big + (5, 2, 2, 3, "netcdf4p"), # stride too big + (100000, 50000, 2, 2, "netcdf4p"), # odd ncpu + ]) +def test__setup_checks_bad_io(ncpu, pio_numiotasks, pio_stride, pio_root, pio_typename): + cmeps_config(ncpu) + + test_runconf = copy.deepcopy(MOCK_IO_RUNCONF) + test_runconf["PELAYOUT_attributes"].update({ + "moc_ntasks": ncpu + }) + test_runconf["MOC_modelio"].update(dict( + pio_numiotasks=pio_numiotasks, + pio_root=pio_root, + pio_stride=pio_stride, + pio_typename=pio_typename, + )) + + with cd(ctrldir): + lab = payu.laboratory.Laboratory(lab_path=str(labdir)) + expt = payu.experiment.Experiment(lab, reproduce=False) + model = expt.models[0] + + model.realms = ["moc"] + + model.runconfig = MockRunConfig(test_runconf) + + with pytest.raises(ValueError): + model._setup_checks() + + teardown_cmeps_config() + + +@pytest.mark.parametrize("pio_typename, pio_async_interface", [ + ("netcdf4p", ".true."), + ("pnetcdf", ".true."), + ("netcdf", ".true."), + ]) +def test__setup_checks_pio_async(pio_typename, pio_async_interface): + + cmeps_config(1) + + test_runconf = copy.deepcopy(MOCK_IO_RUNCONF) + test_runconf["MOC_modelio"].update(dict( + pio_async_interface=pio_async_interface, + pio_typename=pio_typename, + )) + + with cd(ctrldir): + lab = payu.laboratory.Laboratory(lab_path=str(labdir)) + expt = payu.experiment.Experiment(lab, reproduce=False) + model = expt.models[0] + + model.realms = ["moc"] + + model.runconfig = MockRunConfig(test_runconf) + + with pytest.warns( + Warning, match="does not do consistency checks for asynchronous pio" + ): + model._setup_checks() + + teardown_cmeps_config() + + +@pytest.mark.parametrize("pio_numiotasks, pio_stride", [ + (1, -99), + (-99, 1), + ]) +def test__setup_checks_bad_io(pio_numiotasks, pio_stride): + cmeps_config(1) + + test_runconf = copy.deepcopy(MOCK_IO_RUNCONF) + test_runconf["MOC_modelio"].update(dict( + pio_numiotasks=pio_numiotasks, + pio_stride=pio_stride, + )) + + with cd(ctrldir): + lab = payu.laboratory.Laboratory(lab_path=str(labdir)) + expt = payu.experiment.Experiment(lab, reproduce=False) + model = expt.models[0] + + model.realms = ["moc"] + + model.runconfig = MockRunConfig(test_runconf) + + with pytest.warns( + Warning, match="using model default" + ): + model._setup_checks() + + teardown_cmeps_config() diff --git a/test/models/test_cesm_cmeps.py b/test/models/access-om3/test_runconfig.py similarity index 68% rename from test/models/test_cesm_cmeps.py rename to test/models/access-om3/test_runconfig.py index b8fe28f9..8af1f12c 100644 --- a/test/models/test_cesm_cmeps.py +++ b/test/models/access-om3/test_runconfig.py @@ -1,8 +1,23 @@ import os import pytest +import shutil +from test.common import tmpdir from payu.models.cesm_cmeps import Runconfig + +@pytest.fixture() +def runconfig_path(): + return os.path.join('test', 'resources', 'nuopc.runconfig') + + +@pytest.fixture() +def runconfig(runconfig_path): + return Runconfig(runconfig_path) + + +# Runconfig tests: + @pytest.mark.parametrize( "section, variable, expected", [ @@ -17,20 +32,22 @@ ("MED_attributes", "histaux_atm2med_file1_flds", "Faxa_swndr:Faxa_swvdr:Faxa_swndf:Faxa_swvdf"), # correctly read long colon separated value ] ) -def test_runconfig_get(section, variable, expected): +def test_runconfig_get(section, variable, expected, runconfig): """Test getting values from a nuopc.runconfig file""" - runconfig_path = os.path.join('test', 'resources', 'nuopc.runconfig') - runconfig = Runconfig(runconfig_path) - assert runconfig.get(section, variable) == expected -def test_runconfig_get_default(): - """Test getting default values from a nuopc.runconfig file""" - runconfig_path = os.path.join('test', 'resources', 'nuopc.runconfig') - runconfig = Runconfig(runconfig_path) +def test_runconfig_get_default(runconfig): + """Test getting default values from a nuopc.runconfig file""" assert runconfig.get("DOES_NOT_EXIST", "DOES_NOT_EXIST", value="default") == "default" + +def test_runconfig_get_component_list(runconfig): + """Test getting component_list from a nuopc.runconfig file""" + COMP_LIST = ['MED', 'ATM', 'ICE', 'OCN', 'ROF'] + assert runconfig.get_component_list() == COMP_LIST + + @pytest.mark.parametrize( "section, variable, new_variable", [ @@ -38,20 +55,15 @@ def test_runconfig_get_default(): ("CLOCK_attributes", "restart_n", "2"), ] ) -def test_runconfig_set(section, variable, new_variable): +def test_runconfig_set(section, variable, new_variable, runconfig): """Test setting values in a nuopc.runconfig file""" - runconfig_path = os.path.join('test', 'resources', 'nuopc.runconfig') - runconfig = Runconfig(runconfig_path) - runconfig.set(section, variable, new_variable) assert runconfig.get(section, variable) == new_variable -def test_runconfig_set_error(): - """Test error setting values in a nuopc.runconfig file that don't exist""" - runconfig_path = os.path.join('test', 'resources', 'nuopc.runconfig') - runconfig = Runconfig(runconfig_path) +def test_runconfig_set_error(runconfig): + """Test error setting values in a nuopc.runconfig file that don't exist""" with pytest.raises( NotImplementedError, match='Cannot set value of variable that does not already exist' @@ -59,19 +71,22 @@ def test_runconfig_set_error(): runconfig.set("DOES_NOT_EXIST", "OCN_model", "value") runconfig.set("ALLCOMP_attributes", "DOES_NOT_EXIST", "value") -def test_runconfig_set_write_get(): + +def test_runconfig_set_write_get(runconfig): """Test updating the values in a nuopc.runconfig file""" - runconfig_path = os.path.join('test', 'resources', 'nuopc.runconfig') - runconfig = Runconfig(runconfig_path) + + tmpdir.mkdir() assert runconfig.get("CLOCK_attributes", "restart_n") == "1" runconfig.set("CLOCK_attributes", "restart_n", "2") - runconfig_path_tmp = "nuopc.runconfig.tmp" - runconfig.write(runconfig_path_tmp) + runconfig_path_tmp = os.path.join(tmpdir, "nuopc.runconfig.tmp") + runconfig.write(file=runconfig_path_tmp) runconfig_updated = Runconfig(runconfig_path_tmp) - assert runconfig.get("CLOCK_attributes", "restart_n") == "2" + assert runconfig_updated.get("CLOCK_attributes", "restart_n") == "2" os.remove(runconfig_path_tmp) + + shutil.rmtree(tmpdir)