diff --git a/semantic-conventions/CHANGELOG.md b/semantic-conventions/CHANGELOG.md index 49ccc6f3..42a3ecc0 100644 --- a/semantic-conventions/CHANGELOG.md +++ b/semantic-conventions/CHANGELOG.md @@ -32,6 +32,10 @@ Please update the changelog as part of any significant pull request. ([#266](https://github.com/open-telemetry/build-tools/pull/266)) - Sort attribute tables by requirement level and attribute name ([#260](https://github.com/open-telemetry/build-tools/pull/260)) +- Support suppressing all validation errors via flag that allows to + parse previous versions of semantic conventions for backward compatibility checks + and use code generation improvements on older semantic convention version. + ([#300](https://github.com/open-telemetry/build-tools/pull/300)) ## v0.23.0 diff --git a/semantic-conventions/README.md b/semantic-conventions/README.md index 4be014a2..81f91a23 100644 --- a/semantic-conventions/README.md +++ b/semantic-conventions/README.md @@ -123,6 +123,12 @@ The following checks are performed: This check does not take into account opt-in attributes. Adding new attributes to metric is not always breaking, so it's considered non-critical and it's possible to suppress it with `--ignore-warnings` +Previous versions of semantic conventions are not always compatible with newer versions of build-tools. You can suppress validation errors by adding `--continue-on-validation-errors` flag: + +```bash +docker run --rm otel/semconvgen --yaml-root {yaml_folder} --continue-on-validation-errors compatibility --previous-version {semconv version} +``` + ## Code Generator The image supports [Jinja](https://jinja.palletsprojects.com/en/2.11.x/) templates to generate code from the models. @@ -171,13 +177,13 @@ Finally, additional value can be passed to the template in form of `key=value` p comma using the `--parameters [{key=value},]+` or `-D` flag. Generating code from older versions of semantic conventions with new tooling is, in general, not supported. -However in some cases minor incompatibilities in semantic conventions can be ignored by setting `--strict-validation` flag to `false` +However in some cases minor incompatibilities in semantic conventions can be suppressed by adding `--continue-on-validation-errors` flag: ```bash docker run --rm \ otel/semconvgen:$GENERATOR_VERSION \ --yaml-root /source \ - `--strict-validation false` + --continue-on-validation-errors \ code \ ...other parameters... ``` diff --git a/semantic-conventions/src/opentelemetry/semconv/main.py b/semantic-conventions/src/opentelemetry/semconv/main.py index aa1a3eea..fe5a5aea 100644 --- a/semantic-conventions/src/opentelemetry/semconv/main.py +++ b/semantic-conventions/src/opentelemetry/semconv/main.py @@ -28,6 +28,7 @@ CONVENTION_CLS_BY_GROUP_TYPE, SemanticConventionSet, ) +from opentelemetry.semconv.model.utils import ValidationContext from opentelemetry.semconv.templating.code import CodeRenderer from opentelemetry.semconv.templating.compatibility import CompatibilityChecker from opentelemetry.semconv.templating.markdown import MarkdownRenderer @@ -42,7 +43,7 @@ def parse_semconv( for file in sorted(files): if not file.endswith(".yaml") and not file.endswith(".yml"): parser.error(f"{file} is not a yaml file.") - semconv.parse(file, strict_validation) + semconv.parse(file, ValidationContext(file, strict_validation)) semconv.finish() if semconv.has_error(): sys.exit(1) @@ -73,7 +74,11 @@ def main(): args = parser.parse_args() check_args(args, parser) semconv = parse_semconv( - args.yaml_root, args.exclude, args.debug, args.strict_validation, parser + args.yaml_root, + args.exclude, + args.debug, + not args.continue_on_validation_errors, + parser, ) semconv_filter = parse_only_filter(args.only, parser) filter_semconv(semconv, semconv_filter) @@ -93,9 +98,9 @@ def main(): def process_markdown(semconv, args): options = MarkdownOptions( check_only=args.md_check, - disable_stable_badge=args.md_disable_stable, - disable_experimental_badge=args.md_disable_experimental, - disable_deprecated_badge=args.md_disable_deprecated, + disable_stable_badge=args.md_disable_stable_badge, + disable_experimental_badge=args.md_disable_experimental_badge, + disable_deprecated_badge=args.md_disable_deprecated_badge, break_count=args.md_break_conditional, exclude_files=exclude_file_list(args.markdown_root, args.exclude), ) @@ -106,7 +111,11 @@ def process_markdown(semconv, args): def check_compatibility(semconv, args, parser): prev_semconv_path = download_previous_version(args.previous_version) prev_semconv = parse_semconv( - prev_semconv_path, args.exclude, args.debug, args.strict_validation, parser + prev_semconv_path, + args.exclude, + args.debug, + not args.continue_on_validation_errors, + parser, ) compatibility_checker = CompatibilityChecker(semconv, prev_semconv) problems = compatibility_checker.check() @@ -220,12 +229,6 @@ def add_md_parser(subparsers): required=False, action="store_true", ) - parser.add_argument( - "--check-compat", - help="Check backward compatibility with previous version of semantic conventions.", - type=str, - required=False, - ) parser.add_argument( "--md-disable-stable-badge", help="Removes badges from attributes marked as stable.", @@ -306,11 +309,14 @@ def setup_parser(): help="YAML file containing a Semantic Convention", ) parser.add_argument( - "--strict-validation", - help="Fail on non-critical yaml validation issues.", + "--continue-on-validation-errors", + help="""Continue parsing on yaml validation issues. + Should not be used to generate or validate semantic conventions. + Useful when running backward compatibility checks or using newer + tooling version to generate code for released semantic conventions.""", required=False, - default=True, - action="store_false", + default=False, + action="store_true", ) subparsers = parser.add_subparsers(dest="flavor") add_code_parser(subparsers) diff --git a/semantic-conventions/src/opentelemetry/semconv/model/constraints.py b/semantic-conventions/src/opentelemetry/semconv/model/constraints.py index 9acf8c32..a483928d 100644 --- a/semantic-conventions/src/opentelemetry/semconv/model/constraints.py +++ b/semantic-conventions/src/opentelemetry/semconv/model/constraints.py @@ -17,7 +17,6 @@ from ruamel.yaml.comments import CommentedSeq -from opentelemetry.semconv.model.exceptions import ValidationError from opentelemetry.semconv.model.semantic_attribute import SemanticAttribute from opentelemetry.semconv.model.utils import validate_values @@ -64,20 +63,20 @@ class Include: semconv_id: str -def parse_constraints(yaml_constraints): +def parse_constraints(yaml_constraints, validation_ctx): """This method parses the yaml representation for semantic convention attributes creating a list of Constraint objects. """ constraints = () allowed_keys = ("include", "any_of") for constraint in yaml_constraints: - validate_values(constraint, allowed_keys) + validate_values(constraint, allowed_keys, validation_ctx) if len(constraint.keys()) > 1: position = constraint.lc.data[list(constraint)[1]] msg = ( "Invalid entry in constraint array - multiple top-level keys in entry." ) - raise ValidationError.from_yaml_pos(position, msg) + validation_ctx.raise_or_warn(position, msg, None) if "include" in constraint: constraints += (Include(constraint.get("include")),) elif "any_of" in constraint: diff --git a/semantic-conventions/src/opentelemetry/semconv/model/exceptions.py b/semantic-conventions/src/opentelemetry/semconv/model/exceptions.py index 354a3aa7..7e2dd06c 100644 --- a/semantic-conventions/src/opentelemetry/semconv/model/exceptions.py +++ b/semantic-conventions/src/opentelemetry/semconv/model/exceptions.py @@ -19,19 +19,15 @@ class ValidationError(Exception): line -- line in the file where the error occurred column -- column in the file where the error occurred message -- reason of the error + fqn -- identifier of the node that contains the error """ - @classmethod - def from_yaml_pos(cls, pos, msg): - # the yaml parser starts counting from 0 - # while in document is usually reported starting from 1 - return cls(pos[0] + 1, pos[1] + 1, msg) - - def __init__(self, line, column, message): - super().__init__(line, column, message) + def __init__(self, line, column, message, fqn): + super().__init__(line, column, message, fqn) self.message = message self.line = line self.column = column + self.fqn = fqn def __str__(self): - return f"{self.message} - @{self.line}:{self.column}" + return f"{self.message} - @{self.line}:{self.column} ('{self.fqn}')" diff --git a/semantic-conventions/src/opentelemetry/semconv/model/semantic_attribute.py b/semantic-conventions/src/opentelemetry/semconv/model/semantic_attribute.py index 9a53ed7e..233e2e64 100644 --- a/semantic-conventions/src/opentelemetry/semconv/model/semantic_attribute.py +++ b/semantic-conventions/src/opentelemetry/semconv/model/semantic_attribute.py @@ -20,7 +20,6 @@ from ruamel.yaml.comments import CommentedMap, CommentedSeq -from opentelemetry.semconv.model.exceptions import ValidationError from opentelemetry.semconv.model.utils import ( check_no_missing_keys, validate_id, @@ -83,7 +82,7 @@ def is_enum(self): @staticmethod def parse( - prefix, yaml_attributes, strict_validation=True + prefix, yaml_attributes, validation_ctx ) -> "Dict[str, SemanticAttribute]": """This method parses the yaml representation for semantic attributes creating the respective SemanticAttribute objects. @@ -106,18 +105,18 @@ def parse( return attributes for attribute in yaml_attributes: - validate_values(attribute, allowed_keys) + validate_values(attribute, allowed_keys, validation_ctx) attr_id = attribute.get("id") ref = attribute.get("ref") position_data = attribute.lc.data position = position_data[next(iter(attribute))] if attr_id is None and ref is None: msg = "At least one of id or ref is required." - raise ValidationError.from_yaml_pos(position, msg) + validation_ctx.raise_or_warn(position, msg, attr_id) if attr_id is not None: - validate_id(attr_id, position_data["id"]) + validate_id(attr_id, position_data["id"], validation_ctx) attr_type, brief, examples = SemanticAttribute.parse_attribute( - attribute + attribute, validation_ctx ) if prefix: fqn = f"{prefix}.{attr_id}" @@ -127,14 +126,14 @@ def parse( # Ref attr_type = None if "type" in attribute: - msg = f"Ref attribute '{ref}' must not declare a type" - raise ValidationError.from_yaml_pos(position, msg) + msg = "Ref attribute must not declare a type" + validation_ctx.raise_or_warn(position, msg, ref) if "stability" in attribute: - msg = f"Ref attribute '{ref}' must not override stability" - raise ValidationError.from_yaml_pos(position, msg) + msg = "Ref attribute must not override stability" + validation_ctx.raise_or_warn(position, msg, ref) if "deprecated" in attribute: - msg = f"Ref attribute '{ref}' must not override deprecation status" - raise ValidationError.from_yaml_pos(position, msg) + msg = "Ref attribute must not override deprecation status" + validation_ctx.raise_or_warn(position, msg, ref) brief = attribute.get("brief") examples = attribute.get("examples") ref = ref.strip() @@ -154,7 +153,7 @@ def parse( if len(requirement_level_val) != 1: position = position_data["requirement_level"] msg = "Multiple requirement_level values are not allowed!" - raise ValidationError.from_yaml_pos(position, msg) + validation_ctx.raise_or_warn(position, msg, fqn) recommended_msg = requirement_level_val.get("recommended", None) condition_msg = requirement_level_val.get( @@ -174,7 +173,7 @@ def parse( msg = ( f"Value '{requirement_level_val}' for required field is not allowed" ) - raise ValidationError.from_yaml_pos(position, msg) + validation_ctx.raise_or_warn(position, msg, fqn) if ( requirement_level == RequirementLevel.CONDITIONALLY_REQUIRED @@ -182,11 +181,11 @@ def parse( ): position = position_data["requirement_level"] msg = "Missing message for conditionally required field!" - raise ValidationError.from_yaml_pos(position, msg) + validation_ctx.raise_or_warn(position, msg, fqn) tag = attribute.get("tag", "").strip() stability = SemanticAttribute.parse_stability( - attribute.get("stability"), position_data, strict_validation + attribute.get("stability"), position_data, fqn, validation_ctx ) if stability == StabilityLevel.EXPERIMENTAL and isinstance( attr_type, EnumAttributeType @@ -197,14 +196,14 @@ def parse( f"Member '{member.member_id}' is marked as stable " + "but it is not allowed on experimental attribute!" ) - raise ValidationError.from_yaml_pos(position_data["type"], msg) + validation_ctx.raise_or_warn(position_data["type"], msg, fqn) deprecated = SemanticAttribute.parse_deprecated( - attribute.get("deprecated"), position_data + attribute.get("deprecated"), position_data, fqn, validation_ctx ) sampling_relevant = ( - AttributeType.to_bool("sampling_relevant", attribute) + AttributeType.to_bool("sampling_relevant", attribute, validation_ctx) if attribute.get("sampling_relevant") else False ) @@ -241,21 +240,19 @@ def parse( + " is already present at line " + str(attributes[fqn].position[0] + 1) ) - raise ValidationError.from_yaml_pos(position, msg) + validation_ctx.raise_or_warn(position, msg, fqn) attributes[fqn] = attr return attributes @staticmethod - def parse_attribute(attribute): - check_no_missing_keys(attribute, ["type", "brief", "stability"]) + def parse_attribute(attribute, validation_ctx): + check_no_missing_keys(attribute, ["type", "brief", "stability"], validation_ctx) attr_val = attribute["type"] - try: - attr_type = EnumAttributeType.parse(attr_val) - except ValidationError as e: - if e.line != 0: # Does the error already have a position? - raise - position = attribute.lc.data["type"] - raise ValidationError.from_yaml_pos(position, e.message) from e + attr_id = attribute.get("id") + attr_type = EnumAttributeType.parse( + attr_val, attribute.lc.data["type"], validation_ctx + ) + brief = attribute["brief"] examples = attribute.get("examples") is_simple_type = AttributeType.is_simple_type(attr_type) @@ -268,7 +265,7 @@ def parse_attribute(attribute): ): position = attribute.lc.data[list(attribute)[0]] msg = f"Non array examples for {attr_type} are not allowed" - raise ValidationError.from_yaml_pos(position, msg) + validation_ctx.raise_or_warn(position, msg, attr_id) if not isinstance(examples, CommentedSeq) and examples is not None: # TODO: If validation fails later, this will crash when trying to access position data # since a list, contrary to a CommentedSeq, does not have position data @@ -288,7 +285,7 @@ def parse_attribute(attribute): if not examples: position = attribute.lc.data[list(attribute)[0]] msg = f"Empty examples for {attr_type} are not allowed" - raise ValidationError.from_yaml_pos(position, msg) + validation_ctx.raise_or_warn(position, msg, attr_id) if is_template_type: return attr_type, str(brief), examples @@ -301,11 +298,13 @@ def parse_attribute(attribute): # TODO: Implement type check for enum examples or forbid them if examples is not None and is_simple_type: - AttributeType.check_examples_type(attr_type, examples, zlass) + AttributeType.check_examples_type( + attr_type, examples, zlass, attr_id, validation_ctx + ) return attr_type, str(brief), examples @staticmethod - def parse_stability(stability, position_data, strict_validation=True): + def parse_stability(stability, position_data, attr_id, validation_ctx): if stability is None: return None @@ -317,26 +316,22 @@ def parse_stability(stability, position_data, strict_validation=True): if val is not None: return val - # TODO: remove this branch - it's necessary for now to allow back-compat checks against old spec versions - # where we used 'deprecated' as stability level - if not strict_validation and stability == "deprecated": - print( - 'WARNING: Using "deprecated" as stability level is no longer supported. Use "experimental" instead.' - ) - return StabilityLevel.EXPERIMENTAL - msg = f"Value '{stability}' is not allowed as a stability marker" - raise ValidationError.from_yaml_pos(position_data["stability"], msg) + validation_ctx.raise_or_warn(position_data["stability"], msg, attr_id) + # TODO: replace with None - it's necessary for now to give codegen + # a default value for semconv 1.24.0 and lower where we used + # 'deprecated' as stability level + return StabilityLevel.EXPERIMENTAL @staticmethod - def parse_deprecated(deprecated, position_data): + def parse_deprecated(deprecated, position_data, attr_id, validation_ctx): if deprecated is not None: if AttributeType.get_type(deprecated) != "string" or deprecated == "": msg = ( "Deprecated field expects a string that specifies why the attribute is deprecated and/or what" " to use instead! " ) - raise ValidationError.from_yaml_pos(position_data["deprecated"], msg) + validation_ctx.raise_or_warn(position_data["deprecated"], msg, attr_id) return deprecated.strip() return None @@ -415,7 +410,7 @@ def type_mapper(attr_type: str): return type_mapper.get(attr_type) @staticmethod - def check_examples_type(attr_type, examples, zlass): + def check_examples_type(attr_type, examples, zlass, fqn, validation_ctx): """This method checks example are correctly typed""" index = -1 for example in examples: @@ -426,15 +421,15 @@ def check_examples_type(attr_type, examples, zlass): if not isinstance(element, zlass): position = examples.lc.data[index] msg = f"Example with wrong type. Expected {attr_type} examples but is was {type(element)}." - raise ValidationError.from_yaml_pos(position, msg) + validation_ctx.raise_or_warn(position, msg, fqn) else: # Single value example or array example with a single example array if not isinstance(example, zlass): position = examples.lc.data[index] msg = f"Example with wrong type. Expected {attr_type} examples but is was {type(example)}." - raise ValidationError.from_yaml_pos(position, msg) + validation_ctx.raise_or_warn(position, msg, fqn) @staticmethod - def to_bool(key, parent_object): + def to_bool(key, parent_object, validation_ctx): """This method translate yaml boolean values to python boolean values""" yaml_value = parent_object.get(key) if isinstance(yaml_value, bool): @@ -446,7 +441,12 @@ def to_bool(key, parent_object): return False position = parent_object.lc.data[key] msg = f"Value '{yaml_value}' for {key} field is not allowed" - raise ValidationError.from_yaml_pos(position, msg) + validation_ctx.raise_or_warn( + position, + msg, + parent_object.get("id"), + ) + return None @dataclass @@ -468,7 +468,7 @@ def is_valid_enum_value(val): return isinstance(val, (int, str)) @staticmethod - def parse(attribute_type): + def parse(attribute_type, position, validation_ctx): """This method parses the yaml representation for semantic attribute types. If the type is an enumeration, it generated the EnumAttributeType object, otherwise it returns the basic type as string. @@ -478,13 +478,12 @@ def parse(attribute_type): attribute_type ) or AttributeType.is_template_type(attribute_type): return attribute_type - # Wrong type used - raise the exception and fill the missing data in the parent - raise ValidationError( - 0, 0, f"Invalid type: {attribute_type} is not allowed" + validation_ctx.raise_or_warn( + position, f"Invalid type: {attribute_type} is not allowed", None ) allowed_keys = ["allow_custom_values", "members"] mandatory_keys = ["members"] - validate_values(attribute_type, allowed_keys, mandatory_keys) + validate_values(attribute_type, allowed_keys, validation_ctx, mandatory_keys) custom_values = ( bool(attribute_type.get("allow_custom_values")) if "allow_custom_values" in attribute_type @@ -495,37 +494,37 @@ def parse(attribute_type): not isinstance(attribute_type["members"], CommentedSeq) or len(attribute_type["members"]) < 1 ): - raise ValidationError.from_yaml_pos( - attribute_type.lc.data["members"], "Enumeration without members!" + validation_ctx.raise_or_warn( + attribute_type.lc.data["members"], + "Enumeration without members!", + attribute_type.get("id"), ) allowed_keys = ["id", "value", "brief", "note", "stability", "deprecated"] - mandatory_keys = ["id", "value"] + mandatory_keys = ["id", "value", "stability"] for member in attribute_type["members"]: - validate_values(member, allowed_keys, mandatory_keys) + validate_values(member, allowed_keys, validation_ctx, mandatory_keys) if not EnumAttributeType.is_valid_enum_value(member["value"]): - raise ValidationError.from_yaml_pos( + validation_ctx.raise_or_warn( member.lc.data["value"][:2], f"Invalid value used in enum: <{member['value']}>", + attribute_type.get("id"), ) - validate_id(member["id"], member.lc.data["id"]) - stability_str = member.get("stability") - if not stability_str: - raise ValidationError.from_yaml_pos( - member.lc.data["id"], - f"Enumeration member '{member['value']}' must have a stability level", - ) + member_id = member["id"] + validate_id(member_id, member.lc.data["id"], validation_ctx) - stability = SemanticAttribute.parse_stability(stability_str, member.lc.data) + stability = SemanticAttribute.parse_stability( + member.get("stability"), member.lc.data, member_id, validation_ctx + ) deprecated = SemanticAttribute.parse_deprecated( - member.get("deprecated"), member.lc.data + member.get("deprecated"), member.lc.data, member_id, validation_ctx ) members.append( EnumMember( - member_id=member["id"], + member_id=member_id, value=member["value"], - brief=member.get("brief", member["id"]).strip(), + brief=member.get("brief", member_id).strip(), note=member.get("note", "").strip(), stability=stability, deprecated=deprecated, @@ -534,9 +533,10 @@ def parse(attribute_type): enum_type = AttributeType.get_type(members[0].value) for myaml, m in zip(attribute_type["members"], members): if enum_type != AttributeType.get_type(m.value): - raise ValidationError.from_yaml_pos( + validation_ctx.raise_or_warn( myaml.lc.data["value"], f"Enumeration member does not have type {enum_type}!", + m.member_id, ) return EnumAttributeType(custom_values, members, enum_type) diff --git a/semantic-conventions/src/opentelemetry/semconv/model/semantic_convention.py b/semantic-conventions/src/opentelemetry/semconv/model/semantic_convention.py index b9893945..537e335a 100644 --- a/semantic-conventions/src/opentelemetry/semconv/model/semantic_convention.py +++ b/semantic-conventions/src/opentelemetry/semconv/model/semantic_convention.py @@ -28,7 +28,12 @@ SemanticAttribute, ) from opentelemetry.semconv.model.unit_member import UnitMember -from opentelemetry.semconv.model.utils import ValidatableYamlNode, validate_id +from opentelemetry.semconv.model.utils import ( + ValidatableYamlNode, + ValidationContext, + check_no_missing_keys, + validate_id, +) class SpanKind(Enum): @@ -60,16 +65,17 @@ def parse_semantic_convention_type(type_value): return CONVENTION_CLS_BY_GROUP_TYPE.get(type_value) -def parse_semantic_convention_groups(yaml_file, strict_validation=True): +def parse_semantic_convention_groups(yaml_file, validation_ctx): yaml = YAML().load(yaml_file) models = [] for group in yaml["groups"]: - models.append(SemanticConvention(group, strict_validation)) + models.append(SemanticConvention(group, validation_ctx)) return models -def SemanticConvention(group, strict_validation=True): +def SemanticConvention(group, validation_ctx): type_value = group.get("type") + semconv_id = group.get("id") if type_value is None: line = group.lc.data["id"][0] + 1 doc_url = "https://github.com/open-telemetry/build-tools/blob/main/semantic-conventions/syntax.md#groups" @@ -82,11 +88,11 @@ def SemanticConvention(group, strict_validation=True): if convention_type is None: position = group.lc.data["type"] if "type" in group else group.lc.data["id"] msg = f"Invalid value for semantic convention type: {group.get('type')}" - raise ValidationError.from_yaml_pos(position, msg) + validation_ctx.raise_or_warn(position, msg, semconv_id) # First, validate that the correct fields are available in the yaml - convention_type.validate_keys(group) - model = convention_type(group, strict_validation) + convention_type.validate_keys(group, validation_ctx) + model = convention_type(group, validation_ctx) # Also, validate that the value of the fields is acceptable model.validate_values() return model @@ -141,24 +147,27 @@ def comparison_key(attr): key=comparison_key, ) - def __init__(self, group, strict_validation=True): - super().__init__(group) + def __init__(self, group, validation_ctx): + super().__init__(group, validation_ctx) self.semconv_id = self.id self.note = group.get("note", "").strip() self.prefix = group.get("prefix", "").strip() + self.validation_ctx = validation_ctx position_data = group.lc.data self.stability = SemanticAttribute.parse_stability( - group.get("stability"), position_data, strict_validation + group.get("stability"), position_data, self.semconv_id, validation_ctx ) self.deprecated = SemanticAttribute.parse_deprecated( - group.get("deprecated"), position_data + group.get("deprecated"), position_data, self.semconv_id, validation_ctx ) self.extends = group.get("extends", "").strip() self.events = group.get("events", ()) - self.constraints = parse_constraints(group.get("constraints", ())) + self.constraints = parse_constraints( + group.get("constraints", ()), validation_ctx + ) self.attrs_by_name = SemanticAttribute.parse( - self.prefix, group.get("attributes"), strict_validation + self.prefix, group.get("attributes"), validation_ctx ) def contains_attribute(self, attr: "SemanticAttribute"): @@ -182,7 +191,7 @@ def has_attribute_constraint(self, attr): def validate_values(self): super().validate_values() if self.prefix: - validate_id(self.prefix, self._position) + validate_id(self.prefix, self._position, self.validation_ctx) class ResourceSemanticConvention(BaseSemanticConvention): @@ -205,13 +214,13 @@ class SpanSemanticConvention(BaseSemanticConvention): "span_kind", ) - def __init__(self, group, strict_validation=True): - super().__init__(group) + def __init__(self, group, validation_ctx): + super().__init__(group, validation_ctx) self.span_kind = SpanKind.parse(group.get("span_kind")) if self.span_kind is None: position = group.lc.data["span_kind"] msg = f"Invalid value for span_kind: {group.get('span_kind')}" - raise ValidationError.from_yaml_pos(position, msg) + validation_ctx.raise_or_warn(position, msg, group.get("id")) class EventSemanticConvention(BaseSemanticConvention): @@ -219,12 +228,14 @@ class EventSemanticConvention(BaseSemanticConvention): allowed_keys = BaseSemanticConvention.allowed_keys + ("name",) - def __init__(self, group, strict_validation=True): - super().__init__(group) + def __init__(self, group, validation_ctx): + super().__init__(group, validation_ctx) self.name = group.get("name", self.prefix) if not self.name: - raise ValidationError.from_yaml_pos( - self._position, "Event must define at least one of name or prefix" + validation_ctx.raise_or_warn( + self._position, + "Event must define at least one of name or prefix", + group.get("id"), ) @@ -238,9 +249,9 @@ class UnitSemanticConvention(BaseSemanticConvention): "members", ) - def __init__(self, group, strict_validation=True): - super().__init__(group) - self.members = UnitMember.parse(group.get("members")) + def __init__(self, group, validation_ctx): + super().__init__(group, validation_ctx) + self.members = UnitMember.parse(group.get("members"), validation_ctx) class MetricGroupSemanticConvention(BaseSemanticConvention): @@ -267,28 +278,29 @@ class MetricSemanticConvention(MetricGroupSemanticConvention): canonical_instrument_name_by_yaml_name.keys() ) - def __init__(self, group, strict_validation=True): - super().__init__(group) + def __init__(self, group, validation_ctx): + super().__init__(group, validation_ctx) self.metric_name = group.get("metric_name") self.unit = group.get("unit") self.instrument = group.get("instrument") + self.validation_ctx = validation_ctx namespaces = self.metric_name.split(".") self.root_namespace = namespaces[0] if len(namespaces) > 1 else "" - self.validate() + self.validate(group) - def validate(self): - val_tuple = (self.metric_name, self.unit, self.instrument, self.stability) - if not all(val_tuple): - raise ValidationError.from_yaml_pos( - self._position, - "All of metric_name, units, instrument, and stability must be defined", - ) + def validate(self, yaml): + check_no_missing_keys( + yaml, + ["metric_name", "unit", "instrument", "stability"], + self.validation_ctx, + ) if self.instrument not in self.allowed_instruments: - raise ValidationError.from_yaml_pos( + self.validation_ctx.raise_or_warn( self._position, f"Instrument '{self.instrument}' is not a valid instrument name", + self.metric_name, ) @@ -301,17 +313,19 @@ class SemanticConventionSet: debug: bool models: typing.Dict[str, BaseSemanticConvention] = field(default_factory=dict) errors: bool = False + validation_ctx: Optional[ValidationContext] = None - def parse(self, file, strict_validation=True): + def parse(self, file, validation_ctx=None): + self.validation_ctx = validation_ctx or ValidationContext(file, True) with open(file, "r", encoding="utf-8") as yaml_file: try: semconv_models = parse_semantic_convention_groups( - yaml_file, strict_validation + yaml_file, self.validation_ctx ) for model in semconv_models: if model.semconv_id in self.models: self.errors = True - print(f"Error parsing {file}\n", file=sys.stderr) + print(f"\nError parsing {file}:", file=sys.stderr) print( f"Semantic convention '{model.semconv_id}' is already defined.", file=sys.stderr, @@ -319,8 +333,9 @@ def parse(self, file, strict_validation=True): self.models[model.semconv_id] = model except ValidationError as e: self.errors = True - print(f"Error parsing {file}\n", file=sys.stderr) + print(f"\nError parsing {file}:", file=sys.stderr) print(e, file=sys.stderr) + print() def has_error(self): return self.errors @@ -384,17 +399,18 @@ def _populate_extends_single(self, semconv, unprocessed): Resolves the parent/child relationship for a single Semantic Convention. If the parent **p** of the input semantic convention **i** has in turn a parent **pp**, it recursively resolves **pp** before processing **p**. :param semconv: The semantic convention for which resolve the parent/child relationship. - :param semconvs: The list of remaining semantic conventions to process. + :param unprocessed: The list of remaining semantic conventions to process. :return: A list of remaining semantic convention to process. """ # Resolve parent of current Semantic Convention if semconv.extends: extended = self.models.get(semconv.extends) if extended is None: - raise ValidationError.from_yaml_pos( + self.validation_ctx.raise_or_warn( semconv._position, f"Semantic Convention {semconv.semconv_id} extends " f"{semconv.extends} but the latter cannot be found!", + semconv.semconv_id, ) # Process hierarchy chain @@ -435,10 +451,11 @@ def _populate_anyof_attributes(self): for attr_id in attr_ids: ref_attr = self._lookup_attribute(attr_id) if ref_attr is None: - raise ValidationError.from_yaml_pos( + self.validation_ctx.raise_or_warn( any_of._yaml_src_position[index], - f"Any_of attribute '{attr_id}' of semantic convention " - "{semconv.semconv_id} does not exists!", + f"Any_of attribute '{attr_id}' of semantic " + "convention {semconv.semconv_id} does not exist!", + attr_id, ) constraint_attrs.append(ref_attr) if constraint_attrs: @@ -450,16 +467,18 @@ def _populate_events(self): for event_id in semconv.events: event = self.models.get(event_id) if event is None: - raise ValidationError.from_yaml_pos( + self.validation_ctx.raise_or_warn( semconv._position, f"Semantic Convention {semconv.semconv_id} has " "{event_id} as event but the latter cannot be found!", + event_id, ) if not isinstance(event, EventSemanticConvention): - raise ValidationError.from_yaml_pos( + self.validation_ctx.raise_or_warn( semconv._position, f"Semantic Convention {semconv.semconv_id} has {event_id} as event but" " the latter is not a semantic convention for events!", + event_id, ) events.append(event) semconv.events = events @@ -474,9 +493,10 @@ def resolve_ref(self, semconv): fixpoint_ref = False ref_attr = self._lookup_attribute(attr.ref) if not ref_attr: - raise ValidationError.from_yaml_pos( + self.validation_ctx.raise_or_warn( semconv._position, f"Semantic Convention {semconv.semconv_id} reference `{attr.ref}` but it cannot be found!", + semconv.semconv_id, ) attr = self._merge_attribute(attr, ref_attr) return fixpoint_ref @@ -515,10 +535,11 @@ def resolve_include(self, semconv): include_semconv = self.models.get(constraint.semconv_id) # include required attributes and constraints if include_semconv is None: - raise ValidationError.from_yaml_pos( + self.validation_ctx.raise_or_warn( semconv._position, f"Semantic Convention {semconv.semconv_id} includes " "{constraint.semconv_id} but the latter cannot be found!", + semconv.semconv_id, ) # We resolve the parent/child relationship of the included semantic convention, if any self._populate_extends_single( diff --git a/semantic-conventions/src/opentelemetry/semconv/model/unit_member.py b/semantic-conventions/src/opentelemetry/semconv/model/unit_member.py index 92490a34..3068449b 100644 --- a/semantic-conventions/src/opentelemetry/semconv/model/unit_member.py +++ b/semantic-conventions/src/opentelemetry/semconv/model/unit_member.py @@ -6,16 +6,16 @@ class UnitMember(ValidatableYamlNode): allowed_keys = ("id", "brief", "value") mandatory_keys = allowed_keys - def __init__(self, node): - super().__init__(node) + def __init__(self, node, validation_ctx): + super().__init__(node, validation_ctx) self.value = node.get("value") @staticmethod - def parse(members): + def parse(members, validation_ctx): parsed_members = {} for member in members: - UnitMember.validate_keys(member) - unit_member = UnitMember(member) + UnitMember.validate_keys(member, validation_ctx) + unit_member = UnitMember(member, validation_ctx) unit_member.validate_values() parsed_members[unit_member.id] = unit_member diff --git a/semantic-conventions/src/opentelemetry/semconv/model/utils.py b/semantic-conventions/src/opentelemetry/semconv/model/utils.py index 291d28f5..391acc2b 100644 --- a/semantic-conventions/src/opentelemetry/semconv/model/utils.py +++ b/semantic-conventions/src/opentelemetry/semconv/model/utils.py @@ -23,31 +23,33 @@ Each dot must be followed by at least one allowed non-dot character.""" -def validate_id(semconv_id, position): +def validate_id(semconv_id, position, validation_ctx): if not ID_RE.fullmatch(semconv_id): - raise ValidationError.from_yaml_pos( + validation_ctx.raise_or_warn( position, f"Invalid id {semconv_id}. Semantic Convention ids MUST match {ID_RE.pattern}", + semconv_id, ) -def validate_values(yaml, keys, mandatory=()): +def validate_values(yaml, keys, validation_ctx, mandatory=()): """This method checks only valid keywords and value types are used""" + node_id = yaml.get("id") unwanted = [k for k in yaml.keys() if k not in keys] if unwanted: position = yaml.lc.data[unwanted[0]] msg = f"Invalid keys: {unwanted}" - raise ValidationError.from_yaml_pos(position, msg) + validation_ctx.raise_or_warn(position, msg, node_id) if mandatory: - check_no_missing_keys(yaml, mandatory) + check_no_missing_keys(yaml, mandatory, validation_ctx) -def check_no_missing_keys(yaml, mandatory): +def check_no_missing_keys(yaml, mandatory, validation_ctx): missing = list(set(mandatory) - set(yaml)) if missing: position = (yaml.lc.line, yaml.lc.col) msg = f"Missing keys: {missing}" - raise ValidationError.from_yaml_pos(position, msg) + validation_ctx.raise_or_warn(position, msg, yaml.get("id")) class ValidatableYamlNode: @@ -55,22 +57,23 @@ class ValidatableYamlNode: allowed_keys = () # type: Tuple[str, ...] mandatory_keys = ("id", "brief") # type: Tuple[str, ...] - def __init__(self, yaml_node): + def __init__(self, yaml_node, validation_ctx): self.id = yaml_node.get("id").strip() self.brief = str(yaml_node.get("brief")).strip() + self.validation_ctx = validation_ctx self._position = (yaml_node.lc.line, yaml_node.lc.col) @classmethod - def validate_keys(cls, node): + def validate_keys(cls, node, validation_ctx): unwanted = [key for key in node.keys() if key not in cls.allowed_keys] if unwanted: position = node.lc.data[unwanted[0]] msg = f"Invalid keys: {unwanted}" - raise ValidationError.from_yaml_pos(position, msg) + validation_ctx.raise_or_warn(position, msg, node.get("id")) if cls.mandatory_keys: - check_no_missing_keys(node, cls.mandatory_keys) + check_no_missing_keys(node, cls.mandatory_keys, validation_ctx) def validate_values(self): """ @@ -78,4 +81,18 @@ def validate_values(self): This method should raise an exception with a descriptive message if the semantic convention is not valid. """ - validate_id(self.id, self._position) + validate_id(self.id, self._position, self.validation_ctx) + + +class ValidationContext: + def __init__(self, file_name: str, strict_validation: bool): + self.strict_validation = strict_validation + self.file_name = file_name + + def raise_or_warn(self, pos, msg, fqn): + # the yaml parser starts counting from 0 + # while in document is usually reported starting from 1 + error = ValidationError(pos[0] + 1, pos[1] + 1, msg, fqn) + if self.strict_validation: + raise error + print(f"[Warning] {self.file_name}: {error}\n") diff --git a/semantic-conventions/src/tests/data/yaml/errors/enum/enum_with_double_values.yaml b/semantic-conventions/src/tests/data/yaml/errors/enum/enum_with_double_values.yaml index 02b97e43..f3fd1b9d 100644 --- a/semantic-conventions/src/tests/data/yaml/errors/enum/enum_with_double_values.yaml +++ b/semantic-conventions/src/tests/data/yaml/errors/enum/enum_with_double_values.yaml @@ -9,8 +9,10 @@ groups: members: - id: OK value: 0.0 + stability: experimental - id: CANCELLED value: 1.0 + stability: experimental requirement_level: required brief: "The [numeric status code](https://github.com/grpc/grpc/blob/v1.33.2/doc/statuscodes.md) of the gRPC request." examples: [0.0, 1.0] diff --git a/semantic-conventions/src/tests/semconv/model/test_error_detection.py b/semantic-conventions/src/tests/semconv/model/test_error_detection.py index 62dfe360..a6225b73 100644 --- a/semantic-conventions/src/tests/semconv/model/test_error_detection.py +++ b/semantic-conventions/src/tests/semconv/model/test_error_detection.py @@ -22,6 +22,7 @@ SemanticConventionSet, parse_semantic_convention_groups, ) +from opentelemetry.semconv.model.utils import ValidationContext class TestCorrectErrorDetection(unittest.TestCase): @@ -118,8 +119,9 @@ def test_no_override_type(self): self.fail() e = ex.exception msg = e.message.lower() - self.assertIn("must not declare a type", msg) - self.assertIn("'test'", msg) + self.assertEqual("ref attribute must not declare a type", msg) + self.assertEqual("test", e.fqn) + self.assertIn("'test'", str(e)) self.assertEqual(e.line, 8) def test_invalid_key_in_constraint(self): @@ -160,7 +162,8 @@ def test_ref_override_stability(self): self.fail() e = ex.exception msg = e.message.lower() - self.assertIn("ref attribute 'test_attr' must not override stability", msg) + self.assertEqual("ref attribute must not override stability", msg) + self.assertEqual("test_attr", e.fqn) self.assertEqual(e.line, 14) def test_invalid_semconv_stability_with_deprecated(self): @@ -191,9 +194,16 @@ def test_missing_stability_value_on_enum_member(self): self.fail() e = ex.exception msg = e.message.lower() - self.assertIn("enumeration member 'one' must have a stability level", msg) + self.assertIn("missing keys: ['stability']", msg) self.assertEqual(e.line, 10) + def test_missing_stability_value_on_enum_member_not_strict(self): + path = "yaml/errors/stability/missing_stability_on_enum_member.yaml" + with open(self.load_file(path), encoding="utf-8") as file: + return parse_semantic_convention_groups( + file, ValidationContext(path, False) + ) + def test_multiple_stability_values_on_enum_member(self): with self.assertRaises(DuplicateKeyError): self.open_yaml( @@ -234,10 +244,11 @@ def test_extends_overrides_deprecation(self): self.fail() e = ex.exception msg = e.message.lower() - self.assertIn( - "ref attribute 'test.convention_version' must not override deprecation status", + self.assertEqual( + "ref attribute must not override deprecation status", msg, ) + self.assertEqual("test.convention_version", e.fqn) self.assertEqual(e.line, 19) def test_ref_overrides_deprecation(self): @@ -246,10 +257,11 @@ def test_ref_overrides_deprecation(self): self.fail() e = ex.exception msg = e.message.lower() - self.assertIn( - "ref attribute 'test.convention_version' must not override deprecation status", + self.assertEqual( + "ref attribute must not override deprecation status", msg, ) + self.assertEqual("test.convention_version", e.fqn) self.assertEqual(e.line, 17) def test_invalid_deprecated_boolean(self): @@ -498,7 +510,7 @@ def test_validate_anyof_attributes(self): e = ex.exception msg = e.message.lower() self.assertIn("any_of attribute", msg) - self.assertIn("does not exists", msg) + self.assertIn("does not exist", msg) self.assertEqual(e.line, 16) def test_missing_event(self): @@ -553,7 +565,7 @@ def test_multiple_requirement_level_values(self): def open_yaml(self, path): with open(self.load_file(path), encoding="utf-8") as file: - return parse_semantic_convention_groups(file) + return parse_semantic_convention_groups(file, ValidationContext(path, True)) _TEST_DIR = os.path.dirname(__file__) diff --git a/semantic-conventions/src/tests/semconv/model/test_semantic_attribute.py b/semantic-conventions/src/tests/semconv/model/test_semantic_attribute.py index 9a8ef067..28e4fe6c 100644 --- a/semantic-conventions/src/tests/semconv/model/test_semantic_attribute.py +++ b/semantic-conventions/src/tests/semconv/model/test_semantic_attribute.py @@ -15,11 +15,14 @@ import re from opentelemetry.semconv.model.semantic_attribute import SemanticAttribute +from opentelemetry.semconv.model.utils import ValidationContext def test_parse(load_yaml): yaml = load_yaml("semantic_attributes.yml") - attributes = SemanticAttribute.parse("prefix", yaml.get("attributes")) + attributes = SemanticAttribute.parse( + "prefix", yaml.get("attributes"), ValidationContext(None, True) + ) assert len(attributes) == 3 @@ -45,7 +48,9 @@ def test_parse(load_yaml): def test_parse_deprecated(load_yaml): yaml = load_yaml("semantic_attributes_deprecated.yml") - attributes = SemanticAttribute.parse("", yaml.get("attributes")) + attributes = SemanticAttribute.parse( + "", yaml.get("attributes"), ValidationContext(None, True) + ) assert len(attributes) == 1 assert list(attributes.keys()) == ["deprecated_attribute"] @@ -62,7 +67,9 @@ def test_parse_regex(): def test_parse_attribute_templates(load_yaml): yaml = load_yaml("attribute_templates.yml") - attribute_templates = SemanticAttribute.parse("prefix", yaml.get("attributes")) + attribute_templates = SemanticAttribute.parse( + "prefix", yaml.get("attributes"), ValidationContext(None, True) + ) assert len(attribute_templates) == 3 diff --git a/semantic-conventions/src/tests/semconv/model/test_semantic_convention.py b/semantic-conventions/src/tests/semconv/model/test_semantic_convention.py index df7a133f..98d9a208 100644 --- a/semantic-conventions/src/tests/semconv/model/test_semantic_convention.py +++ b/semantic-conventions/src/tests/semconv/model/test_semantic_convention.py @@ -18,11 +18,14 @@ SpanKind, parse_semantic_convention_groups, ) +from opentelemetry.semconv.model.utils import ValidationContext def test_parse_basic(open_test_file): with open_test_file(os.path.join("yaml", "basic_example.yml")) as yaml_file: - conventions = parse_semantic_convention_groups(yaml_file) + conventions = parse_semantic_convention_groups( + yaml_file, ValidationContext(open_test_file, True) + ) assert conventions is not None assert len(conventions) == 2 diff --git a/semantic-conventions/src/tests/semconv/model/test_semantic_convention_units.py b/semantic-conventions/src/tests/semconv/model/test_semantic_convention_units.py index 5728a749..c2cdc880 100644 --- a/semantic-conventions/src/tests/semconv/model/test_semantic_convention_units.py +++ b/semantic-conventions/src/tests/semconv/model/test_semantic_convention_units.py @@ -7,11 +7,14 @@ UnitSemanticConvention, parse_semantic_convention_groups, ) +from opentelemetry.semconv.model.utils import ValidationContext def test_build_units(open_test_file): with open_test_file(os.path.join("yaml", "metrics", "units.yaml")) as yaml_file: - conventions = parse_semantic_convention_groups(yaml_file) + conventions = parse_semantic_convention_groups( + yaml_file, ValidationContext(open_test_file, True) + ) assert len(conventions) == 1 convention = conventions[0] @@ -42,5 +45,7 @@ def test_build_units_bad(open_test_file): with pytest.raises(ValidationError) as excinfo, open_test_file( os.path.join("yaml", "metrics", "units_bad_with_attributes.yaml") ) as yaml_file: - parse_semantic_convention_groups(yaml_file) + parse_semantic_convention_groups( + yaml_file, ValidationContext(open_test_file, True) + ) assert "attributes" in str(excinfo.value) diff --git a/semantic-conventions/src/tests/semconv/model/test_utils.py b/semantic-conventions/src/tests/semconv/model/test_utils.py index 1bffee42..61875f60 100644 --- a/semantic-conventions/src/tests/semconv/model/test_utils.py +++ b/semantic-conventions/src/tests/semconv/model/test_utils.py @@ -14,7 +14,11 @@ import pytest from opentelemetry.semconv.model.exceptions import ValidationError -from opentelemetry.semconv.model.utils import validate_id, validate_values +from opentelemetry.semconv.model.utils import ( + ValidationContext, + validate_id, + validate_values, +) _POSITION = [10, 2] @@ -35,7 +39,7 @@ def test_validate_id__valid(semconv_id): # a lowercase letter. The rest of the id may contain only lowercase letters, # numbers, underscores, and dashes - validate_id(semconv_id, _POSITION) + validate_id(semconv_id, _POSITION, True) @pytest.mark.parametrize( @@ -43,11 +47,15 @@ def test_validate_id__valid(semconv_id): ) def test_validate_id__invalid(semconv_id): with pytest.raises(ValidationError) as err: - validate_id(semconv_id, _POSITION) + validate_id(semconv_id, _POSITION, ValidationContext(None, True)) assert err.value.message.startswith("Invalid id") +def test_validate_id__invalid_not_strict(): + validate_id("123", _POSITION, ValidationContext(None, False)) + + @pytest.mark.parametrize( "allowed, mandatory", [ @@ -63,7 +71,7 @@ def test_validate_values(load_yaml, allowed, mandatory): conventions = load_yaml("basic_example.yml") yaml = conventions["groups"][0] - validate_values(yaml, allowed, mandatory) + validate_values(yaml, allowed, ValidationContext(None, True), mandatory) @pytest.mark.parametrize( @@ -96,6 +104,6 @@ def test_validate_values__invalid(load_yaml, allowed, mandatory, expected_messag yaml = conventions["groups"][0] with pytest.raises(ValidationError) as err: - validate_values(yaml, allowed, mandatory) + validate_values(yaml, allowed, ValidationContext(None, True), mandatory) assert err.value.message == expected_message