Skip to content

Commit

Permalink
Merge pull request #480 from ACCESS-NRI/441-inconsistency-between-yam…
Browse files Browse the repository at this point in the history
…l-libs-bugfix

Add warnings for duplicate keys in config.yaml files
  • Loading branch information
jo-basevi authored Aug 20, 2024
2 parents 8aa701e + cddbef6 commit 39b9ba4
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 5 deletions.
16 changes: 13 additions & 3 deletions payu/branch.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from typing import Optional
import shutil

from ruamel.yaml import YAML, CommentedMap
from ruamel.yaml import YAML, CommentedMap, constructor
import git

from payu.fsops import read_config, DEFAULT_CONFIG_FNAME, list_archive_dirs
Expand Down Expand Up @@ -67,8 +67,18 @@ def add_restart_to_config(restart_path: Path, config_path: Path) -> None:
restart in archive"""

# Default ruamel yaml preserves comments and multiline strings
yaml = YAML()
config = yaml.load(config_path)
try:
yaml = YAML()
config = yaml.load(config_path)
except constructor.DuplicateKeyError as e:
# PyYaml which is used to read config allows duplicate keys
# These will get removed when the configuration file is modified
warnings.warn(
"Removing any subsequent duplicate keys from config.yaml"
)
yaml = YAML()
yaml.allow_duplicate_keys = True
config = yaml.load(config_path)

# Add in restart path
config['restart'] = str(restart_path)
Expand Down
10 changes: 9 additions & 1 deletion payu/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import shlex
import subprocess
import sys
import warnings

import payu
import payu.envmod as envmod
Expand All @@ -23,10 +24,17 @@
from payu.schedulers import index as scheduler_index
import payu.subcommands


# Default configuration
DEFAULT_CONFIG = 'config.yaml'

# Force warnings.warn() to omit the source code line in the message
formatwarning_orig = warnings.formatwarning
warnings.formatwarning = (
lambda message, category, filename, lineno, line=None: (
formatwarning_orig(message, category, filename, lineno, line='')
)
)


def parse():
"""Parse the command line inputs and execute the subcommand."""
Expand Down
24 changes: 23 additions & 1 deletion payu/fsops.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import shlex
import subprocess
from typing import Union, List
import warnings

# Extensions
import yaml
Expand Down Expand Up @@ -62,6 +63,27 @@ def movetree(src, dst, symlinks=False):
shutil.rmtree(src)


class DuplicateKeyWarnLoader(yaml.SafeLoader):
def construct_mapping(self, node, deep=False):
"""Add warning for duplicate keys in yaml file, as currently
PyYAML overwrites duplicate keys even though in YAML, keys
are meant to be unique
"""
mapping = {}
for key_node, value_node in node.value:
key = self.construct_object(key_node, deep=deep)
value = self.construct_object(value_node, deep=deep)
if key in mapping:
warnings.warn(
"Duplicate key found in config.yaml: "
f"key '{key}' with value '{value}'. "
f"This overwrites the original value: '{mapping[key]}'"
)
mapping[key] = value

return super().construct_mapping(node, deep)


def read_config(config_fname=None):
"""Parse input configuration file and return a config dict."""

Expand All @@ -70,7 +92,7 @@ def read_config(config_fname=None):

try:
with open(config_fname, 'r') as config_file:
config = yaml.safe_load(config_file)
config = yaml.load(config_file, Loader=DuplicateKeyWarnLoader)

# NOTE: A YAML file with no content returns `None`
if config is None:
Expand Down
32 changes: 32 additions & 0 deletions test/test_payu.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from io import StringIO
import os
from pathlib import Path
import payu.branch
import pytest
import shutil
import stat
Expand Down Expand Up @@ -432,3 +433,34 @@ def test_run_userscript_command(tmp_path):
])
def test_needs_shell(command, expected):
assert payu.fsops.needs_subprocess_shell(command) == expected


def test_read_config_yaml_duplicate_key():
"""The PyYAML library is used for reading config.yaml, but use ruamel yaml
is used in when modifying config.yaml as part of payu checkout
(ruamel is used to preserve comments and multi-line strings).
This led to bug #441, where pyyaml allowed duplicate keys but
ruamel.library raises an error
"""
# Create a yaml file with a duplicate key
config_content = """
pbs_flags: value1
pbs_flags: value2
"""
config_path = tmpdir / "config.yaml"
with config_path.open("w") as file:
file.write(config_content)

# Test read config passes without an error but a warning is raised
warn_msg = "Duplicate key found in config.yaml: key 'pbs_flags' with "
warn_msg += "value 'value2'. This overwrites the original value: 'value1'"
with pytest.warns(UserWarning, match=warn_msg):
payu.fsops.read_config(config_path)

restart_path = tmpdir / "restarts"

# Test add restart to config.yaml does not fail with an error, but
# still raises another warning that duplicates keys will be deleted
warn_msg = "Removing any subsequent duplicate keys from config.yaml"
with pytest.warns(UserWarning, match=warn_msg):
payu.branch.add_restart_to_config(restart_path, config_path)

0 comments on commit 39b9ba4

Please sign in to comment.