Skip to content

Commit

Permalink
Merge pull request #6 from INCATools/add-bioregistry
Browse files Browse the repository at this point in the history
Convert IRI -> CURIE using `bioregistry` while `parse`-ing a KGCL command.
  • Loading branch information
hrshdhgd authored Aug 16, 2022
2 parents 44a20f0 + 44c8099 commit 87901f1
Show file tree
Hide file tree
Showing 6 changed files with 144 additions and 26 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ __pycache__
.pytest_cache
.idea
tmp/
tests/outputs/*
112 changes: 107 additions & 5 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ readme = "README.md"
python = "^3.8"
linkml-runtime = "^1.1.24"
lark = "^1.1.2"
bioregistry = "^0.5.49"

[tool.poetry.dev-dependencies]
linkml = "^1.2.15"
Expand Down
30 changes: 22 additions & 8 deletions src/kgcl_schema/grammar/parser.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,24 @@
"""KGCL parser."""
import logging
import re
import sys
from pathlib import Path
from typing import List

import click
from kgcl_schema.utils import to_json, to_rdf, to_yaml
from lark import Lark, Token

from kgcl_schema.datamodel.kgcl import (ClassCreation, EdgeCreation, EdgeDeletion,
NewSynonym, NodeAnnotationChange, NodeCreation,
from bioregistry import parse_iri, get_preferred_prefix, curie_to_str
from kgcl_schema.datamodel.kgcl import (Change, ClassCreation, EdgeCreation,
EdgeDeletion, NewSynonym,
NodeAnnotationChange, NodeCreation,
NodeDeepening, NodeDeletion, NodeMove,
NodeObsoletion, NodeRename, NodeShallowing,
NodeUnobsoletion, PlaceUnder, PredicateChange,
RemovedNodeFromSubset, RemoveUnder, Change, Session)
NodeObsoletion, NodeRename,
NodeShallowing, NodeUnobsoletion,
PlaceUnder, PredicateChange,
RemovedNodeFromSubset, RemoveUnder,
Session)
from kgcl_schema.datamodel.ontology_model import Edge
from kgcl_schema.utils import to_json, to_rdf, to_yaml
from lark import Lark, Token


def id_generator():
Expand Down Expand Up @@ -54,6 +58,16 @@ def parse_statement(input: str) -> Change:
Return an instantiated dataclass object from model.kgcl_schema.
"""
regex = r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
uri_list = re.findall(regex, input)
if uri_list:
# curie = curie_from_iri(uri[0].replace("<", "").replace(">",""))
for _, uri in enumerate(uri_list):
pref, i = parse_iri(uri)
pref = get_preferred_prefix(pref)
curie = curie_to_str(pref, i)
input = input.replace(uri, curie)

tree = kgcl_parser.parse(input)
id = "kgcl_change_id_" + str(next(id_gen))

Expand Down
4 changes: 2 additions & 2 deletions src/kgcl_schema/grammar/render_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,13 @@ def render_entity(entity, rdf_type):
if rdf_type is None:
return entity
elif rdf_type == "uri":
return "<" + entity + ">"
return entity
elif rdf_type == "label":
if "'" in entity:
# TODO: replacing quotes with backticks
# is only a temporary workaround
entity = entity.replace("'", "`")
return "'" + entity + "'"
return entity
elif rdf_type == "literal":
# TODO: test this
if '"' not in entity:
Expand Down
22 changes: 11 additions & 11 deletions tests/cases.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,21 @@

TODO_TOKEN = "TODO"
PART_OF = "BFO:0000050"
PART_OF_URI = "<http://purl.obolibrary.org/obo/BFO_0000050>"
PART_OF_URI = "http://purl.obolibrary.org/obo/BFO_0000050"
IS_A = "rdfs:subClassOf"
IS_A_URI = "<http://www.w3.org/2000/01/rdf-schema#subClassOf>"
IS_A_URI = "http://www.w3.org/2000/01/rdf-schema#subClassOf"
NUCLEUS = "GO:0005634"
NUCLEUS_URI = "<http://purl.obolibrary.org/obo/GO_0005634>"
NUCLEUS_URI = "http://purl.obolibrary.org/obo/GO_0005634"
MITOCHONDRION = "GO:0005739"
MITOCHONDRION_URI = "<http://purl.obolibrary.org/obo/GO_0005739>"
MITOCHONDRION_URI = "http://purl.obolibrary.org/obo/GO_0005739"
IMBO = "GO:0043231"
IMBO_URI = "<http://purl.obolibrary.org/obo/GO_0043231>"
IMBO_URI = "http://purl.obolibrary.org/obo/GO_0043231"
NUCLEAR_ENVELOPE = "GO:0005635"
NUCLEAR_ENVELOPE_URI = "<http://purl.obolibrary.org/obo/GO_0005635>"
NUCLEAR_ENVELOPE_URI = "http://purl.obolibrary.org/obo/GO_0005635"
NEW_TERM = "GO:9999999"
NEW_TERM_URI = "<http://purl.obolibrary.org/obo/GO_9999999>"
NEW_TERM_URI = "http://purl.obolibrary.org/obo/GO_9999999"
RESPONSE_TO_UV = "GO:0009411"
RESPONSE_TO_UV_URI = "<http://purl.obolibrary.org/obo/GO_0009411>"
RESPONSE_TO_UV_URI = "http://purl.obolibrary.org/obo/GO_0009411"

UID = "CHANGE:001"
TERM = "GO:123"
Expand Down Expand Up @@ -124,10 +124,10 @@
#f"create node {NEW_TERM_URI} 'foo'",
TODO_TOKEN,
NodeCreation(id=UID,
node_id=NEW_TERM_URI, ## TODO: remove this
about_node=NEW_TERM_URI,
node_id=NEW_TERM, ## TODO: remove this
about_node=NEW_TERM,
name="'foo'",
about_node_representation='uri'),
about_node_representation='curie'),
None
),
(
Expand Down

0 comments on commit 87901f1

Please sign in to comment.