diff --git a/.gitignore b/.gitignore index d714cfa5..070df142 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ __pycache__ .pytest_cache .idea tmp/ +tests/outputs/* diff --git a/poetry.lock b/poetry.lock index d56283f2..d6268ff1 100644 --- a/poetry.lock +++ b/poetry.lock @@ -55,6 +55,32 @@ python-versions = ">=3.6" [package.dependencies] pytz = ">=2015.7" +[[package]] +name = "bioregistry" +version = "0.5.49" +description = "Integrated registry of biological databases and nomenclatures" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +click = "*" +more-click = "*" +pydantic = "*" +pystow = ">=0.1.13" +requests = "*" +tqdm = "*" + +[package.extras] +align = ["pyyaml", "beautifulsoup4", "tabulate", "defusedxml", "class-resolver"] +charts = ["matplotlib", "matplotlib-venn", "seaborn", "pandas"] +docs = ["sphinx", "sphinx-rtd-theme", "sphinx-click", "sphinx-autodoc-typehints", "sphinx-automodapi", "autodoc-pydantic"] +export = ["pyyaml", "rdflib", "rdflib-jsonld", "ndex2"] +gha = ["more-itertools"] +health = ["click-default-group", "pandas", "tabulate"] +tests = ["coverage", "pytest", "more-itertools"] +web = ["pyyaml", "rdflib", "rdflib-jsonld", "flask", "flasgger", "bootstrap-flask (<=2.0.0)", "markdown"] + [[package]] name = "certifi" version = "2022.6.15" @@ -228,6 +254,21 @@ docs = ["sphinx", "jaraco.packaging (>=9)", "rst.linker (>=1.9)"] perf = ["ipython"] testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-cov", "pytest-enabler (>=1.3)", "packaging", "pyfakefs", "flufl.flake8", "pytest-perf (>=0.9.2)", "pytest-black (>=0.3.7)", "pytest-mypy (>=0.9.1)", "importlib-resources (>=1.3)"] +[[package]] +name = "importlib-resources" +version = "5.9.0" +description = "Read resources from Python packages" +category = "main" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""} + +[package.extras] +docs = ["sphinx", "jaraco.packaging (>=9)", "rst.linker (>=1.9)", "jaraco.tidelift (>=1.4)"] +testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-black (>=0.3.7)", "pytest-mypy (>=0.9.1)"] + [[package]] name = "iniconfig" version = "1.1.1" @@ -337,6 +378,7 @@ python-versions = ">=3.7" [package.dependencies] attrs = ">=17.4.0" +importlib-resources = {version = ">=1.4.0", markers = "python_version < \"3.9\""} pyrsistent = ">=0.14.0,<0.17.0 || >0.17.0,<0.17.1 || >0.17.1,<0.17.2 || >0.17.2" [package.extras] @@ -552,6 +594,17 @@ category = "dev" optional = false python-versions = ">=3.6" +[[package]] +name = "more-click" +version = "0.1.1" +description = "More click." +category = "main" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +click = "*" + [[package]] name = "myst-parser" version = "0.18.0" @@ -652,7 +705,7 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" name = "pydantic" version = "1.9.1" description = "Data validation and settings management using python type hints" -category = "dev" +category = "main" optional = false python-versions = ">=3.6.1" @@ -748,6 +801,27 @@ pyjsg = ">=0.11.10" rdflib-shim = "*" shexjsg = ">=0.8.1" +[[package]] +name = "pystow" +version = "0.4.6" +description = "Easily pick a place to store data for your python package." +category = "main" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +click = "*" +requests = "*" +tqdm = "*" + +[package.extras] +aws = ["boto3"] +docs = ["sphinx", "sphinx-rtd-theme", "sphinx-click", "sphinx-autodoc-typehints", "sphinx-automodapi"] +pandas = ["pandas"] +rdf = ["rdflib"] +tests = ["coverage", "pytest", "requests-file"] +xml = ["lxml"] + [[package]] name = "pytest" version = "7.1.2" @@ -1101,11 +1175,28 @@ category = "dev" optional = false python-versions = ">=3.7" +[[package]] +name = "tqdm" +version = "4.64.0" +description = "Fast, Extensible Progress Meter" +category = "main" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7" + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[package.extras] +dev = ["py-make (>=0.1.0)", "twine", "wheel"] +notebook = ["ipywidgets (>=6)"] +slack = ["slack-sdk"] +telegram = ["requests"] + [[package]] name = "typing-extensions" version = "4.2.0" description = "Backported and Experimental Type Hints for Python 3.7+" -category = "dev" +category = "main" optional = false python-versions = ">=3.7" @@ -1145,7 +1236,7 @@ python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" name = "zipp" version = "3.8.0" description = "Backport of pathlib-compatible object wrapper for zip files" -category = "dev" +category = "main" optional = false python-versions = ">=3.7" @@ -1158,8 +1249,8 @@ docs = [] [metadata] lock-version = "1.1" -python-versions = "^3.9" -content-hash = "ea35eaca451afb882c41edaad240673851a54e656168b68cbb97eefd82d7f233" +python-versions = "^3.8" +content-hash = "9079b4450180f11ffe7b50db0dab7403599f7f733b0028936c8c88c2c87272a0" [metadata.files] alabaster = [ @@ -1185,6 +1276,7 @@ babel = [ {file = "Babel-2.10.3-py3-none-any.whl", hash = "sha256:ff56f4892c1c4bf0d814575ea23471c230d544203c7748e8c68f0089478d48eb"}, {file = "Babel-2.10.3.tar.gz", hash = "sha256:7614553711ee97490f732126dc077f8d0ae084ebc6a96e23db1482afabdb2c51"}, ] +bioregistry = [] certifi = [ {file = "certifi-2022.6.15-py3-none-any.whl", hash = "sha256:fe86415d55e84719d75f8b69414f6438ac3547d2078ab91b67e779ef69378412"}, {file = "certifi-2022.6.15.tar.gz", hash = "sha256:84c85a9078b11105f04f3036a9482ae10e4621616db313fe045dd24743a0820d"}, @@ -1305,6 +1397,7 @@ importlib-metadata = [ {file = "importlib_metadata-4.12.0-py3-none-any.whl", hash = "sha256:7401a975809ea1fdc658c3aa4f78cc2195a0e019c5cbc4c06122884e9ae80c23"}, {file = "importlib_metadata-4.12.0.tar.gz", hash = "sha256:637245b8bab2b6502fcbc752cc4b7a6f6243bb02b31c5c26156ad103d3d45670"}, ] +importlib-resources = [] iniconfig = [ {file = "iniconfig-1.1.1-py2.py3-none-any.whl", hash = "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3"}, {file = "iniconfig-1.1.1.tar.gz", hash = "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"}, @@ -1436,6 +1529,10 @@ mkdocs-material-extensions = [ {file = "mkdocs-material-extensions-1.0.3.tar.gz", hash = "sha256:bfd24dfdef7b41c312ede42648f9eb83476ea168ec163b613f9abd12bbfddba2"}, {file = "mkdocs_material_extensions-1.0.3-py3-none-any.whl", hash = "sha256:a82b70e533ce060b2a5d9eb2bc2e1be201cf61f901f93704b4acf6e3d5983a44"}, ] +more-click = [ + {file = "more_click-0.1.1-py3-none-any.whl", hash = "sha256:ff68c7e874fd409ce501903be3177363499aa9c2662607a3b66568f766dea527"}, + {file = "more_click-0.1.1.tar.gz", hash = "sha256:277c64767a6a9c6625ec6bc3e1241012867f6953b2295b2a1e8eeddec586eb53"}, +] myst-parser = [ {file = "myst-parser-0.18.0.tar.gz", hash = "sha256:739a4d96773a8e55a2cacd3941ce46a446ee23dcd6b37e06f73f551ad7821d86"}, {file = "myst_parser-0.18.0-py3-none-any.whl", hash = "sha256:4965e51918837c13bf1c6f6fe2c6bddddf193148360fbdaefe743a4981358f6a"}, @@ -1552,6 +1649,7 @@ pyshexc = [ {file = "PyShExC-0.9.1-py2.py3-none-any.whl", hash = "sha256:efc55ed5cb2453e9df569b03e282505e96bb06597934288f3b23dd980ef10028"}, {file = "PyShExC-0.9.1.tar.gz", hash = "sha256:35a9975d4b9afeb20ef710fb6680871756381d0c39fbb5470b3b506581a304d3"}, ] +pystow = [] pytest = [ {file = "pytest-7.1.2-py3-none-any.whl", hash = "sha256:13d0e3ccfc2b6e26be000cb6568c832ba67ba32e719443bfe725814d3c42433c"}, {file = "pytest-7.1.2.tar.gz", hash = "sha256:a06a0425453864a270bc45e71f783330a7428defb4230fb5e6a731fde06ecd45"}, @@ -1744,6 +1842,10 @@ tomli = [ {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, ] +tqdm = [ + {file = "tqdm-4.64.0-py2.py3-none-any.whl", hash = "sha256:74a2cdefe14d11442cedf3ba4e21a3b84ff9a2dbdc6cfae2c34addb2a14a5ea6"}, + {file = "tqdm-4.64.0.tar.gz", hash = "sha256:40be55d30e200777a307a7585aee69e4eabb46b4ec6a4b4a5f2d9f11e7d5408d"}, +] typing-extensions = [ {file = "typing_extensions-4.2.0-py3-none-any.whl", hash = "sha256:6657594ee297170d19f67d55c05852a874e7eb634f4f753dbd667855e07c1708"}, {file = "typing_extensions-4.2.0.tar.gz", hash = "sha256:f1c24655a0da0d1b67f07e17a5e6b2a105894e6824b92096378bb3668ef02376"}, diff --git a/pyproject.toml b/pyproject.toml index 65c5f1f0..33dc9968 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,6 +14,7 @@ readme = "README.md" python = "^3.8" linkml-runtime = "^1.1.24" lark = "^1.1.2" +bioregistry = "^0.5.49" [tool.poetry.dev-dependencies] linkml = "^1.2.15" diff --git a/src/kgcl_schema/grammar/parser.py b/src/kgcl_schema/grammar/parser.py index ca136d21..307a7cdc 100644 --- a/src/kgcl_schema/grammar/parser.py +++ b/src/kgcl_schema/grammar/parser.py @@ -1,20 +1,24 @@ """KGCL parser.""" import logging +import re import sys from pathlib import Path from typing import List import click -from kgcl_schema.utils import to_json, to_rdf, to_yaml -from lark import Lark, Token - -from kgcl_schema.datamodel.kgcl import (ClassCreation, EdgeCreation, EdgeDeletion, - NewSynonym, NodeAnnotationChange, NodeCreation, +from bioregistry import parse_iri, get_preferred_prefix, curie_to_str +from kgcl_schema.datamodel.kgcl import (Change, ClassCreation, EdgeCreation, + EdgeDeletion, NewSynonym, + NodeAnnotationChange, NodeCreation, NodeDeepening, NodeDeletion, NodeMove, - NodeObsoletion, NodeRename, NodeShallowing, - NodeUnobsoletion, PlaceUnder, PredicateChange, - RemovedNodeFromSubset, RemoveUnder, Change, Session) + NodeObsoletion, NodeRename, + NodeShallowing, NodeUnobsoletion, + PlaceUnder, PredicateChange, + RemovedNodeFromSubset, RemoveUnder, + Session) from kgcl_schema.datamodel.ontology_model import Edge +from kgcl_schema.utils import to_json, to_rdf, to_yaml +from lark import Lark, Token def id_generator(): @@ -54,6 +58,16 @@ def parse_statement(input: str) -> Change: Return an instantiated dataclass object from model.kgcl_schema. """ + regex = r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+' + uri_list = re.findall(regex, input) + if uri_list: + # curie = curie_from_iri(uri[0].replace("<", "").replace(">","")) + for _, uri in enumerate(uri_list): + pref, i = parse_iri(uri) + pref = get_preferred_prefix(pref) + curie = curie_to_str(pref, i) + input = input.replace(uri, curie) + tree = kgcl_parser.parse(input) id = "kgcl_change_id_" + str(next(id_gen)) diff --git a/src/kgcl_schema/grammar/render_operations.py b/src/kgcl_schema/grammar/render_operations.py index 996c1f7d..ce5b9944 100644 --- a/src/kgcl_schema/grammar/render_operations.py +++ b/src/kgcl_schema/grammar/render_operations.py @@ -20,13 +20,13 @@ def render_entity(entity, rdf_type): if rdf_type is None: return entity elif rdf_type == "uri": - return "<" + entity + ">" + return entity elif rdf_type == "label": if "'" in entity: # TODO: replacing quotes with backticks # is only a temporary workaround entity = entity.replace("'", "`") - return "'" + entity + "'" + return entity elif rdf_type == "literal": # TODO: test this if '"' not in entity: diff --git a/tests/cases.py b/tests/cases.py index edfec7d6..3fb2b5ab 100644 --- a/tests/cases.py +++ b/tests/cases.py @@ -6,21 +6,21 @@ TODO_TOKEN = "TODO" PART_OF = "BFO:0000050" -PART_OF_URI = "" +PART_OF_URI = "http://purl.obolibrary.org/obo/BFO_0000050" IS_A = "rdfs:subClassOf" -IS_A_URI = "" +IS_A_URI = "http://www.w3.org/2000/01/rdf-schema#subClassOf" NUCLEUS = "GO:0005634" -NUCLEUS_URI = "" +NUCLEUS_URI = "http://purl.obolibrary.org/obo/GO_0005634" MITOCHONDRION = "GO:0005739" -MITOCHONDRION_URI = "" +MITOCHONDRION_URI = "http://purl.obolibrary.org/obo/GO_0005739" IMBO = "GO:0043231" -IMBO_URI = "" +IMBO_URI = "http://purl.obolibrary.org/obo/GO_0043231" NUCLEAR_ENVELOPE = "GO:0005635" -NUCLEAR_ENVELOPE_URI = "" +NUCLEAR_ENVELOPE_URI = "http://purl.obolibrary.org/obo/GO_0005635" NEW_TERM = "GO:9999999" -NEW_TERM_URI = "" +NEW_TERM_URI = "http://purl.obolibrary.org/obo/GO_9999999" RESPONSE_TO_UV = "GO:0009411" -RESPONSE_TO_UV_URI = "" +RESPONSE_TO_UV_URI = "http://purl.obolibrary.org/obo/GO_0009411" UID = "CHANGE:001" TERM = "GO:123" @@ -124,10 +124,10 @@ #f"create node {NEW_TERM_URI} 'foo'", TODO_TOKEN, NodeCreation(id=UID, - node_id=NEW_TERM_URI, ## TODO: remove this - about_node=NEW_TERM_URI, + node_id=NEW_TERM, ## TODO: remove this + about_node=NEW_TERM, name="'foo'", - about_node_representation='uri'), + about_node_representation='curie'), None ), (