Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pick the deepest error among the most relevant ones in each subschema #1258

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 13 additions & 3 deletions jsonschema/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,6 @@ def relevance(error):
validator = error.validator
return ( # prefer errors which are ...
-len(error.path), # 'deeper' and thereby more specific
error.path, # earlier (for sibling errors)
validator not in weak, # for a non-low-priority keyword
validator in strong, # for a high priority keyword
not error._matches_type(), # at least match the instance's type
Expand Down Expand Up @@ -427,7 +426,8 @@ def best_match(errors, key=relevance):
since they indicate "more" is wrong with the instance.

If the resulting match is either :kw:`oneOf` or :kw:`anyOf`, the
*opposite* assumption is made -- i.e. the deepest error is picked,
*opposite* assumption is made -- i.e. the deepest error is picked
among the most relevant errors in each separate subschema,
since these keywords only need to match once, and any other errors
may not be relevant.

Expand Down Expand Up @@ -464,9 +464,19 @@ def best_match(errors, key=relevance):
best = max(itertools.chain([best], errors), key=key)

while best.context:
# Calculate the most relevant error in each separate subschema
best_in_subschemas = [None] * len(best.validator_value)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know I'm still behind on looking at this, but just to mention why I haven't yet merged it, I want to stare at this closer, as I'm somewhat suspicious of the types here -- specifically, it's likely true that validator_value is always a container for any builtin JSON Schema keywords, but certainly not in general (i.e. someone can invent some other one).

And similarly below I'm slightly suspicious of the schema_path business.

Not to say any of it looks wrong, but I want to convince myself if it needs any additional hardening.

And of course again thanks for the thought and PR, I'll get to it sometime soon I hope (days not weeks).

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If assuming that only oneOf and anyOf can have non-empty context, then schema_path is seemingly always an integer because it was produced from enumerating https://github.com/python-jsonschema/jsonschema/blob/v4.22.0/jsonschema/_keywords.py#L340
This also implies that yes, validator_value may be something that can be enumerated, but without len.

Alternatively, assuming that best.context has errors in strict order of .schema_path (see also the same algorithms _keywords.anyOf | oneOf):

        best_in_subschemas = []
        for error in best.context:
            index = error.schema_path[0]
            if index == len(best_in_subschemas):
                best_in_subschemas.append(error)
            else:
                prev = best_in_subschemas[index]
                best_in_subschemas[index] = max(prev, error, key=key)

for error in best.context:
index = error.schema_path[0]
prev = best_in_subschemas[index]
if prev is None:
best_in_subschemas[index] = error
else:
best_in_subschemas[index] = max(prev, error, key=key)

# Calculate the minimum via nsmallest, because we don't recurse if
# all nested errors have the same relevance (i.e. if min == max == all)
smallest = heapq.nsmallest(2, best.context, key=key)
smallest = heapq.nsmallest(2, best_in_subschemas, key=key)
if len(smallest) == 2 and key(smallest[0]) == key(smallest[1]): # noqa: PLR2004
return best
best = smallest[0]
Expand Down
134 changes: 130 additions & 4 deletions jsonschema/tests/test_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,53 @@ def test_if_the_most_relevant_error_is_anyOf_it_is_traversed(self):
best = self.best_match_of(instance={"foo": {"bar": 12}}, schema=schema)
self.assertEqual(best.validator_value, "array")

def test_anyOf_traversal_for_single_shallower_errors_better_match(self):
"""
Traverse the context of an anyOf error with the only subschema,
and select the most relevant error.
"""

schema = {
"anyOf": [
{
"properties": {
"foo": {
"minProperties": 2,
"properties": {"bar": {"type": "object"}},
},
},
},
],
}
best = self.best_match_of(instance={"foo": {"bar": []}}, schema=schema)
self.assertEqual(best.validator, "minProperties")

def test_anyOf_traversal_least_relevant_among_most_relevant_errors(self):
"""
Traverse the context of an anyOf error, and select
the *least* relevant error among the most relevant errors
in each separate subschema.

I.e. since only one of the schemas must match, we look for the most
specific one, and choose the most relevant error produced by it.
"""

schema = {
"anyOf": [
{"type": "string"},
{
"properties": {
"foo": {
"minProperties": 2,
"properties": {"bar": {"type": "object"}},
},
},
},
],
}
best = self.best_match_of(instance={"foo": {"bar": []}}, schema=schema)
self.assertEqual(best.validator, "minProperties")

def test_no_anyOf_traversal_for_equally_relevant_errors(self):
"""
We don't traverse into an anyOf (as above) if all of its context errors
Expand All @@ -86,6 +133,21 @@ def test_no_anyOf_traversal_for_equally_relevant_errors(self):
best = self.best_match_of(instance=[], schema=schema)
self.assertEqual(best.validator, "anyOf")

def test_no_anyOf_traversal_for_two_properties_sibling_errors(self):
"""
We don't traverse into an anyOf if all of its context errors
seem to be equally "wrong" against the instance.
"""

schema = {
"anyOf": [
{"properties": {"foo": {"type": "string"}}},
{"properties": {"bar": {"type": "string"}}},
],
}
best = self.best_match_of(instance={"foo": 1, "bar": 1}, schema=schema)
self.assertEqual(best.validator, "anyOf")

def test_anyOf_traversal_for_single_equally_relevant_error(self):
"""
We *do* traverse anyOf with a single nested error, even though it is
Expand All @@ -100,7 +162,7 @@ def test_anyOf_traversal_for_single_equally_relevant_error(self):
best = self.best_match_of(instance=[], schema=schema)
self.assertEqual(best.validator, "type")

def test_anyOf_traversal_for_single_sibling_errors(self):
def test_anyOf_traversal_for_single_sibling_errors_choose_first(self):
"""
We *do* traverse anyOf with a single subschema that fails multiple
times (e.g. on multiple items).
Expand All @@ -111,8 +173,9 @@ def test_anyOf_traversal_for_single_sibling_errors(self):
{"items": {"const": 37}},
],
}
best = self.best_match_of(instance=[12, 12], schema=schema)
best = self.best_match_of(instance=[12, 13], schema=schema)
self.assertEqual(best.validator, "const")
self.assertEqual(best.instance, 12)

def test_anyOf_traversal_for_non_type_matching_sibling_errors(self):
"""
Expand Down Expand Up @@ -152,6 +215,53 @@ def test_if_the_most_relevant_error_is_oneOf_it_is_traversed(self):
best = self.best_match_of(instance={"foo": {"bar": 12}}, schema=schema)
self.assertEqual(best.validator_value, "array")

def test_oneOf_traversal_for_single_shallower_errors_better_match(self):
"""
Traverse the context of an oneOf error with the only subschema,
and select the most relevant error.
"""

schema = {
"oneOf": [
{
"properties": {
"foo": {
"minProperties": 2,
"properties": {"bar": {"type": "object"}},
},
},
},
],
}
best = self.best_match_of(instance={"foo": {"bar": []}}, schema=schema)
self.assertEqual(best.validator, "minProperties")

def test_oneOf_traversal_least_relevant_among_most_relevant_errors(self):
"""
Traverse the context of an oneOf error, and select
the *least* relevant error among the most relevant errors
in each separate subschema.

I.e. since only one of the schemas must match, we look for the most
specific one, and choose the most relevant error produced by it.
"""

schema = {
"oneOf": [
{"type": "string"},
{
"properties": {
"foo": {
"minProperties": 2,
"properties": {"bar": {"type": "object"}},
},
},
},
],
}
best = self.best_match_of(instance={"foo": {"bar": []}}, schema=schema)
self.assertEqual(best.validator, "minProperties")

def test_no_oneOf_traversal_for_equally_relevant_errors(self):
"""
We don't traverse into an oneOf (as above) if all of its context errors
Expand All @@ -168,6 +278,21 @@ def test_no_oneOf_traversal_for_equally_relevant_errors(self):
best = self.best_match_of(instance=[], schema=schema)
self.assertEqual(best.validator, "oneOf")

def test_no_oneOf_traversal_for_two_properties_sibling_errors(self):
"""
We don't traverse into an oneOf if all of its context errors
seem to be equally "wrong" against the instance.
"""

schema = {
"oneOf": [
{"properties": {"foo": {"type": "string"}}},
{"properties": {"bar": {"type": "string"}}},
],
}
best = self.best_match_of(instance={"foo": 1, "bar": 1}, schema=schema)
self.assertEqual(best.validator, "oneOf")

def test_oneOf_traversal_for_single_equally_relevant_error(self):
"""
We *do* traverse oneOf with a single nested error, even though it is
Expand All @@ -182,7 +307,7 @@ def test_oneOf_traversal_for_single_equally_relevant_error(self):
best = self.best_match_of(instance=[], schema=schema)
self.assertEqual(best.validator, "type")

def test_oneOf_traversal_for_single_sibling_errors(self):
def test_oneOf_traversal_for_single_sibling_errors_choose_first(self):
"""
We *do* traverse oneOf with a single subschema that fails multiple
times (e.g. on multiple items).
Expand All @@ -193,8 +318,9 @@ def test_oneOf_traversal_for_single_sibling_errors(self):
{"items": {"const": 37}},
],
}
best = self.best_match_of(instance=[12, 12], schema=schema)
best = self.best_match_of(instance=[12, 13], schema=schema)
self.assertEqual(best.validator, "const")
self.assertEqual(best.instance, 12)

def test_oneOf_traversal_for_non_type_matching_sibling_errors(self):
"""
Expand Down
Loading