Skip to content

Commit

Permalink
Add test for expected behavior of sorted Turtle output
Browse files Browse the repository at this point in the history
This patch adds a test to start specifying what sorting Turtle output
would look like.  This is intended to start discussion about
expectations of blank node sorting, and to set an initial interface for
triggering sorted output with a propagated keyword argument in
`Graph.serialize()`.

This patch will fail CI, but should not fail for code-style reasons.
The new test script was reviewed with black, flake8, isort, and
mypy (--strict).

References:
* RDFLib#1890

Signed-off-by: Alex Nelson <[email protected]>
  • Loading branch information
ajnelson-nist committed Jun 1, 2022
1 parent 4ded2eb commit 88e7287
Showing 1 changed file with 92 additions and 0 deletions.
92 changes: 92 additions & 0 deletions test/test_turtle_sort_issue1890.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
#!/usr/bin/env python3

# This software was developed at the National Institute of Standards
# and Technology by employees of the Federal Government in the course
# of their official duties. Pursuant to title 17 Section 105 of the
# United States Code this software is not subject to copyright
# protection and is in the public domain. NIST assumes no
# responsibility whatsoever for its use by other parties, and makes
# no guarantees, expressed or implied, about its quality,
# reliability, or any other characteristic.
#
# We would appreciate acknowledgement if the software is used.

import random
from collections import defaultdict
from typing import DefaultDict, List

from rdflib import RDFS, BNode, Graph, Literal, Namespace, URIRef


def test_sort_semiblank_graph() -> None:
"""
This test reviews whether the output of the Turtle form is
consistent when involving repeated generates with blank nodes.
"""

EX = Namespace("http://example.org/ex/")

serialization_counter: DefaultDict[str, int] = defaultdict(int)

first_graph_text: str = ""

# Use a fixed sequence of once-but-no-longer random values for more
# consistent test results.
nonrandom_shuffler = random.Random(1234)
for x in range(1, 10):
graph = Graph()
graph.bind("ex", EX)
graph.bind("rdfs", RDFS)

graph.add((EX.A, RDFS.comment, Literal("Thing A")))
graph.add((EX.B, RDFS.comment, Literal("Thing B")))
graph.add((EX.C, RDFS.comment, Literal("Thing C")))

nodes: List[URIRef] = [EX.A, EX.B, EX.C, EX.B]
nonrandom_shuffler.shuffle(nodes)
for node in nodes:
# Instantiate one bnode per URIRef node.
graph.add((BNode(), RDFS.seeAlso, node))

nesteds: List[URIRef] = [EX.A, EX.B, EX.C]
nonrandom_shuffler.shuffle(nesteds)
for nested in nesteds:
# Instantiate a nested node reference.
outer_node = BNode()
inner_node = BNode()
graph.add((outer_node, EX.has, inner_node))
graph.add((inner_node, RDFS.seeAlso, nested))

graph_text = graph.serialize(format="turtle", sort=True)
if first_graph_text == "":
first_graph_text = graph_text

serialization_counter[graph_text] += 1

expected_serialization = """
@prefix ex: <http://example.org/ex/> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
ex:A rdfs:comment "Thing A" .
ex:B rdfs:comment "Thing B" .
ex:C rdfs:comment "Thing C" .
[] ex:has [ rdfs:seeAlso ex:A ] .
[] ex:has [ rdfs:seeAlso ex:B ] .
[] ex:has [ rdfs:seeAlso ex:C ] .
[] rdfs:seeAlso ex:A .
[] rdfs:seeAlso ex:B .
[] rdfs:seeAlso ex:B .
[] rdfs:seeAlso ex:C .
"""

assert expected_serialization.strip() == first_graph_text.strip()
assert 1 == len(serialization_counter)

0 comments on commit 88e7287

Please sign in to comment.