Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/blazegraph #521

Merged
merged 20 commits into from
May 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions backend/feedback/migrations/0003_alter_feedback_id.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 3.2.12 on 2022-04-12 10:11

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('feedback', '0002_auto_20200401_1436'),
]

operations = [
migrations.AlterField(
model_name='feedback',
name='id',
field=models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID'),
),
]
33 changes: 33 additions & 0 deletions backend/items/migrations/0006_auto_20220412_1211.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Generated by Django 3.2.12 on 2022-04-12 10:11

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('items', '0005_auto_20210708_1249'),
]

operations = [
migrations.AlterField(
model_name='editcounter',
name='id',
field=models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID'),
),
migrations.AlterField(
model_name='itemcounter',
name='id',
field=models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID'),
),
migrations.AlterField(
model_name='semanticquery',
name='id',
field=models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID'),
),
migrations.AlterField(
model_name='semanticquery',
name='query',
field=models.JSONField(),
),
]
3 changes: 1 addition & 2 deletions backend/items/models.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from django.db import models
from django.contrib.postgres.fields import JSONField
from django.conf import settings

from rdf.baseclasses import BaseCounter
Expand All @@ -16,7 +15,7 @@ class EditCounter(BaseCounter):

class SemanticQuery(models.Model):
label = models.CharField(blank=True, max_length=100)
query = JSONField()
query = models.JSONField()
creator = models.ForeignKey(
settings.AUTH_USER_MODEL, null=True, on_delete=models.SET_NULL,
)
Expand Down
4 changes: 0 additions & 4 deletions backend/ontology/fixture.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,6 @@ def replace_prefix(graph_in, prefix_in, prefix_out):
replacing the prefix and then parsing the result into a new Graph.
Not efficient; do not use for large graphs.
"""
if type(prefix_in) == str:
prefix_in = prefix_in.encode()
if type(prefix_out) == str:
prefix_out = prefix_out.encode()
serialized = graph_in.serialize(format=REPARSE_FORMAT)
replaced = serialized.replace(prefix_in, prefix_out)
graph_out = Graph()
Expand Down
19 changes: 9 additions & 10 deletions backend/ontology/fixture_test.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,18 @@
from rdflib import Graph, Literal

from rdf.ns import *
from rdf.ns import DCTYPES, RDF, RDFS, SCHEMA, Namespace
from rdf.utils import graph_from_triples
from rdflib import Literal

from .constants import *
from .fixture import *
from .constants import ONTOLOGY_NS, SOURCE_PREFIX
from .fixture import canonical_graph, replace_prefix


def graph_with_prefix(prefix):
my = Namespace(prefix)
# Following triples for testing purposes only. URIs might not exist.
return graph_from_triples((
( my.sandwich, RDF.type, SCHEMA.Food ),
( DCTYPES.Series, RDFS.domain, my.TVChannel ),
( SCHEMA.Cat, my.meow, Literal('loud') ),
(my.sandwich, RDF.type, SCHEMA.Food),
(DCTYPES.Series, RDFS.domain, my.TVChannel),
(SCHEMA.Cat, my.meow, Literal('loud')),
))


Expand All @@ -33,5 +32,5 @@ def test_canonical_graph():
g = canonical_graph()
assert len(g) > 0
text = g.serialize(format='n3')
assert ONTOLOGY_NS.encode() in text
assert SOURCE_PREFIX == ONTOLOGY_NS or SOURCE_PREFIX.encode() not in text
assert ONTOLOGY_NS in text
assert SOURCE_PREFIX == ONTOLOGY_NS or SOURCE_PREFIX not in text
9 changes: 4 additions & 5 deletions backend/ontology/rdf_migrations.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@

COLOR_SUPERCLASS_UPDATE = '''
INSERT {
?superclass schema:color ?colorcode .
?subclass schema:color ?colorcode .
?superclass ?color ?colorcode .
?subclass ?color ?colorcode .
}
WHERE {
OPTIONAL{ ?subclass ?related ?superclass . }
Expand Down Expand Up @@ -154,10 +154,9 @@ def check_superclass(subclass, superclass, input_graph=None):
def set_superclass_color(superclass, colorcode, input_graph=None):
context = input_graph if input_graph else graph()
bindings = {'superclass': superclass, 'colorcode': Literal(
colorcode), 'related': SKOS.related}
namespaces = {'schema': SCHEMA}
colorcode), 'related': SKOS.related, 'color': SCHEMA.color}
context.update(COLOR_SUPERCLASS_UPDATE,
initBindings=bindings, initNs=namespaces)
initBindings=bindings)


def annotations_need_verification(anno_class, input_graph=None):
Expand Down
6 changes: 3 additions & 3 deletions backend/rdf/baseclasses.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from django.db import models, DatabaseError
from django.db import models
from django.db.models import F
from django.db.transaction import atomic
# See https://docs.djangoproject.com/en/2.2/_modules/django/utils/decorators/
from django.utils.decorators import classproperty
# See https://docs.djangoproject.com/en/3.2/_modules/django/utils/functional
from django.utils.functional import classproperty
JeltevanBoheemen marked this conversation as resolved.
Show resolved Hide resolved


class BaseCounter(models.Model):
Expand Down
19 changes: 15 additions & 4 deletions backend/rdf/conftest.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
from importlib import import_module

from items.conftest import TRIPLES as ITEMS
from pytest import fixture

from rdflib import Graph, ConjunctiveGraph, Literal, URIRef

from rdf.utils import graph_from_triples, prune_triples
from items.conftest import TRIPLES as ITEMS
from rdflib import ConjunctiveGraph, Graph, Literal, URIRef
JeltevanBoheemen marked this conversation as resolved.
Show resolved Hide resolved

from .ns import *

MAGIC_NODE = URIRef('http://hogwarts.edu/')
Expand Down Expand Up @@ -35,6 +34,7 @@ def triples():
def empty_graph():
return Graph()


@fixture
def items():
return ITEMS
Expand All @@ -44,6 +44,7 @@ def items():
def filled_graph(triples):
return graph_from_triples(triples)


@fixture
def filled_conjunctive_graph(items):
return graph_from_triples(items, ConjunctiveGraph)
Expand All @@ -64,3 +65,13 @@ def app_with_rdf_migrations():
def app_without_rdf_migrations():
from .test_apps import without_migrations
return without_migrations.__name__


@fixture
def prefixed_query():
return '''
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX schema: <http://www.schema.org/>
SELECT ?s ?p ?o WHERE { ?s ?p ?o }
'''
2 changes: 1 addition & 1 deletion backend/rdf/parsers_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,6 @@

def test_rdflibparser(filled_graph):
parser = JSONLDParser()
serialized = filled_graph.serialize(format='json-ld')
serialized = filled_graph.serialize(format='json-ld').encode()
parsed = parser.parse(BytesIO(serialized))
assert len(parsed ^ filled_graph) == 0
59 changes: 52 additions & 7 deletions backend/rdf/utils.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
import random
import re

from django.conf import settings
from django.contrib.auth.models import Permission
from django.contrib.contenttypes.models import ContentType
from rdflib import Graph, ConjunctiveGraph, Literal, URIRef
from rdflib_django.models import Store
from items import namespace as ITEM
from rdflib import ConjunctiveGraph, Graph, Literal, URIRef
from rdflib.plugins.stores.sparqlstore import SPARQLStore
from rdflib_django.models import Store

PREFIX_PATTERN = re.compile(r'PREFIX\s+(\w+):\s*<\S+>', re.IGNORECASE)


def get_conjunctive_graph():
Expand All @@ -19,16 +23,19 @@ def prune_triples(graph, triples):
graph.remove(triple)


def prune_triples_cascade(graph, triples, graphs_applied_to = [], privileged_predicates = []):
def prune_triples_cascade(graph, triples, graphs_applied_to=[], privileged_predicates=[]):
"""
Recursively remove subjects in `triples` and all related resources from `graph`.
Specify which graphs qualify, i.e. from which triples will be deleted, in `graphs_applied_to`.
Optionally, skip items related via specific (privileged) predicates.
"""
for triple in triples:
prune_recursively(graph, triple[0], graphs_applied_to, privileged_predicates)
prune_recursively(
graph, triple[0], graphs_applied_to, privileged_predicates
)


def prune_recursively(graph, subject, graphs_applied_to = [], privileged_predicates = []):
def prune_recursively(graph, subject, graphs_applied_to=[], privileged_predicates=[]):
"""
Recursively remove subject and all related resources from `graph`.
Specify which graphs qualify, i.e. from which triples will be deleted, in `graphs_applied_to`.
Expand All @@ -38,7 +45,8 @@ def prune_recursively(graph, subject, graphs_applied_to = [], privileged_predica

for s, p, o, c in related_by_subject:
if isinstance(o, URIRef) and o != s and p not in privileged_predicates and c in graphs_applied_to:
prune_recursively(graph, o, graphs_applied_to, privileged_predicates)
prune_recursively(graph, o, graphs_applied_to,
privileged_predicates)

prune_triples(graph, related_by_subject)

Expand All @@ -59,7 +67,7 @@ def graph_from_triples(triples, ctor=Graph):
def sample_graph(graph, subjects, request):
""" Return a random sample from a graph, optionally filtering with a list containing [predicate, object]. """
n_results = int(request.GET.get('n_results'))
if len(subjects)>n_results:
if len(subjects) > n_results:
sampled_subjects = random.sample(list(subjects), n_results)
else:
sampled_subjects = subjects
Expand All @@ -69,6 +77,7 @@ def sample_graph(graph, subjects, request):
[output.add(s) for s in suggestions]
return output


def traverse_forward(full_graph, fringe, plys):
"""
Traverse `full_graph` by object `plys` times, starting from `fringe`.
Expand Down Expand Up @@ -119,3 +128,39 @@ def traverse_backward(full_graph, fringe, plys):
subjects = set(fringe.subjects()) - visited_subjects
plys -= 1
return result


def patched_inject_prefixes(self, query, extra_bindings):
''' Monkeypatch for SPARQLStore prefix injection
Parses the incoming query for prefixes,
and ignores these when injecting additional namespaces.
Better implementation is possibly available,
e.g. use rdfblibs query parser to extract prefixes.
'''
query_prefixes = re.findall(PREFIX_PATTERN, query)

# prefixes available in the query should be deducted from the store's nsBindings
# prefixes that were provided through initNs should take precedence over all others
bindings = {x for x in set(self.nsBindings.items())
if x[0] not in query_prefixes}
bindings |= set(extra_bindings.items())

# remove the extra bindings from the original query
for k in set(extra_bindings.keys()):
if k in query_prefixes:
replace_pattern = re.compile(
fr'PREFIX\s+{k}:\s*<.+>', re.IGNORECASE)
query = re.sub(replace_pattern, '', query)

if not bindings:
return query
return "\n".join(
[
"\n".join(["PREFIX %s: <%s>" % (k, v) for k, v in bindings]),
"", # separate ns_bindings from query with an empty line
query,
]
)


SPARQLStore._inject_prefixes = patched_inject_prefixes
35 changes: 29 additions & 6 deletions backend/rdf/utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from .ns import *
from .utils import *
from items import namespace as ITEM
import re


@pytest.fixture
def other_triples():
Expand All @@ -23,18 +25,22 @@ def test_prune_triples(filled_graph, other_triples):


def test_prune_triples_cascade(filled_conjunctive_graph):
anno = ( ITEM['7'], RDF.type, OA.Annotation )
anno = (ITEM['7'], RDF.type, OA.Annotation)
# get our item graph from the conjunctive graph
privileged_graph = next(filled_conjunctive_graph.contexts()) # victim graph :D
prune_triples_cascade(filled_conjunctive_graph, (anno,), [privileged_graph])
privileged_graph = next(
filled_conjunctive_graph.contexts()) # victim graph :D
prune_triples_cascade(filled_conjunctive_graph,
(anno,), [privileged_graph])
assert len(filled_conjunctive_graph) == 0


def test_prune_triples_cascade_privileged(filled_conjunctive_graph):
anno = ( ITEM['7'], RDF.type, OA.Annotation )
anno = (ITEM['7'], RDF.type, OA.Annotation)
# get our item graph from the conjunctive graph
privileged_graph = next(filled_conjunctive_graph.contexts()) # victim graph :D
prune_triples_cascade(filled_conjunctive_graph, (anno,), [privileged_graph], [OA.hasBody])
privileged_graph = next(
filled_conjunctive_graph.contexts()) # victim graph :D
prune_triples_cascade(filled_conjunctive_graph, (anno,), [
privileged_graph], [OA.hasBody])
assert len(filled_conjunctive_graph) == 14


Expand Down Expand Up @@ -88,3 +94,20 @@ def test_traverse_backward(filled_graph, other_triples):
new_size = len(result)
assert new_size > size
size = new_size


def test_prefix_injection(sparqlstore, prefixed_query):
expected_prefixes = ['rdf', 'rdfs', 'schema']

res = sparqlstore._inject_prefixes(prefixed_query, {})
assert all(prefix in re.findall(PREFIX_PATTERN, res)
for prefix in expected_prefixes)
assert 'PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>' in res

res = sparqlstore._inject_prefixes(
prefixed_query, extra_bindings={'rdf': 'https://cat-bounce.com',
'schema': 'http://randomcolour.com/'})
assert all(prefix in re.findall(PREFIX_PATTERN, res)
for prefix in expected_prefixes)
assert 'PREFIX rdf: <https://cat-bounce.com>' in res
assert 'PREFIX schema: <http://randomcolour.com/>' in res
Loading