Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add TSV writer and update other writers' docs #107

Merged
merged 7 commits into from
Mar 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions docs/source/tutorial.rst
Original file line number Diff line number Diff line change
Expand Up @@ -418,6 +418,38 @@ project.
>>> converter = curies.get_bioregistry_converter()
>>> slim_converter = converter.get_subconverter(prefixes)

Writing a Context
-----------------
After loading and modifying a context, there are several functions for writing
a context to a file:

- :func:`curies.write_extended_prefix_map`
- :func:`curies.write_jsonld_context`
- :func:`curies.write_shacl`
- :func:`curies.write_tsv`

Here's a self-contained example on how this works:

.. code-block:: python

import curies
converter = curies.load_prefix_map({
"CHEBI": "http://purl.obolibrary.org/obo/CHEBI_",
})
curies.write_shacl(converter, "example_shacl.ttl")

which outputs the following file:

.. code-block::

@prefix sh: <http://www.w3.org/ns/shacl#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

[
sh:declare
[ sh:prefix "CHEBI" ; sh:namespace "http://purl.obolibrary.org/obo/CHEBI_"^^xsd:anyURI ]
] .

Faultless handling of overlapping URI prefixes
----------------------------------------------
Most implementations of URI parsing iterate through the CURIE prefix/URI prefix pairs
Expand Down
2 changes: 2 additions & 0 deletions src/curies/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
write_extended_prefix_map,
write_jsonld_context,
write_shacl,
write_tsv,
)
from .discovery import discover, discover_from_rdf
from .reconciliation import remap_curie_prefixes, remap_uri_prefixes, rewire
Expand Down Expand Up @@ -53,6 +54,7 @@
"write_extended_prefix_map",
"write_jsonld_context",
"write_shacl",
"write_tsv",
# sources
"get_obo_converter",
"get_prefixcommons_converter",
Expand Down
136 changes: 122 additions & 14 deletions src/curies/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
"write_extended_prefix_map",
"write_jsonld_context",
"write_shacl",
"write_tsv",
]

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -2129,29 +2130,83 @@ def _ensure_path(path: Union[str, Path]) -> Path:
return path


def _get_jsonld_context(
converter: Converter, *, expand: bool = False, include_synonyms: bool = False
) -> Dict[str, Any]:
"""Get a JSON-LD context based on the converter."""
context = {}
for record in converter.records:
term = _get_expanded_term(record, expand=expand)
context[record.prefix] = term
if include_synonyms:
for prefix_synonym in record.prefix_synonyms:
context[prefix_synonym] = term
return {"@context": context}


def write_jsonld_context(
converter: Converter,
path: Union[str, Path],
*,
include_synonyms: bool = False,
expand: bool = False,
) -> None:
"""Write the converter's bijective map as a JSON-LD context to a file."""
"""Write the converter's bijective map as a JSON-LD context to a file.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Didn't even think to check for this. You write great docstrings!


:param converter: The converter to export
:param path: The path to a file to write to
:param include_synonyms: If true, includes CURIE prefix synonyms.
URI prefix synonyms are not output.
:param expand: If False, output a dictionary-like ``@context`` element.
If True, use ``@prefix`` and ``@id`` as keys for the CURIE prefix
and URI prefix, respectively, to maximize compatibility.

The following example shows writing a JSON-LD context:

.. code-block:: python

import curies
converter = curies.load_prefix_map({
"CHEBI": "http://purl.obolibrary.org/obo/CHEBI_",
})
curies.write_jsonld_context(converter, "example_context.json")

.. code-block:: json

{
"@context": {
"CHEBI": "http://purl.obolibrary.org/obo/CHEBI_"
}
}

Because some implementations of JSON-LD do not like URI prefixes that end
with an underscore ``_``, we can use the ``expand`` keyword to turn on more
verbose JSON-LD context output that contains explicit ``@prefix`` and
``@id`` annotations

.. code-block:: python

import curies
converter = curies.load_prefix_map({
"CHEBI": "http://purl.obolibrary.org/obo/CHEBI_",
})
curies.write_jsonld_context(converter, "example_context.json", expand=True)

.. code-block:: json

{
"@context": {
"CHEBI": {
"@id": "http://purl.obolibrary.org/obo/CHEBI_",
"@prefix": true
}
}
}
"""
path = _ensure_path(path)
context = {}
for record in converter.records:
term = _get_expanded_term(record, expand=expand)
context[record.prefix] = term
if include_synonyms:
for prefix_synonym in record.prefix_synonyms:
context[prefix_synonym] = term
obj = _get_jsonld_context(converter, include_synonyms=include_synonyms, expand=expand)
with path.open("w") as file:
json.dump(
fp=file,
indent=4,
sort_keys=True,
obj={"@context": context},
)
json.dump(obj, file, indent=4, sort_keys=True)


def _get_expanded_term(record: Record, *, expand: bool) -> Union[str, Dict[str, Any]]:
Expand Down Expand Up @@ -2183,6 +2238,24 @@ def write_shacl(
URI prefix synonyms are not output.

.. seealso:: https://www.w3.org/TR/shacl/#sparql-prefixes

.. code-block:: python

import curies
converter = curies.load_prefix_map({
"CHEBI": "http://purl.obolibrary.org/obo/CHEBI_",
})
curies.write_shacl(converter, "example_shacl.ttl")

.. code-block::

@prefix sh: <http://www.w3.org/ns/shacl#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

[
sh:declare
[ sh:prefix "CHEBI" ; sh:namespace "http://purl.obolibrary.org/obo/CHEBI_"^^xsd:anyURI ]
] .
"""
text = dedent(
"""\
Expand All @@ -2207,6 +2280,41 @@ def write_shacl(
path.write_text(text.format(entries=",\n".join(lines)))


def write_tsv(

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great!

converter: Converter, path: Union[str, Path], *, header: Tuple[str, str] = ("prefix", "base")
) -> None:
"""Write a simple prefix map CSV file.

:param converter: The converter to export
:param path: The path to a file to write to
:param header: A 2-tuple of strings representing the header used in the file,
where the first element is the label for CURIE prefixes and the second
element is the label for URI prefixes

.. code-block:: python

import curies
converter = curies.load_prefix_map({
"CHEBI": "http://purl.obolibrary.org/obo/CHEBI_",
})
curies.write_tsv(converter, "example_context.tsv")

.. code-block::

prefix base
CHEBI http://purl.obolibrary.org/obo/CHEBI_
"""
import csv

path = _ensure_path(path)

with path.open("w") as csvfile:
writer = csv.writer(csvfile, delimiter="\t")
writer.writerow(header)
for record in converter.records:
writer.writerow((record.prefix, record.uri_prefix))


def _get_shacl_line(prefix: str, uri_prefix: str, pattern: Optional[str] = None) -> str:
line = f' [ sh:prefix "{prefix}" ; sh:namespace "{uri_prefix}"^^xsd:anyURI '
if pattern:
Expand Down
Loading