Skip to content

Commit

Permalink
rdf author fields serialisation fix
Browse files Browse the repository at this point in the history
  • Loading branch information
hkir-dev committed Oct 4, 2024
1 parent c4fb95a commit fa25649
Show file tree
Hide file tree
Showing 19 changed files with 901,268 additions and 700 deletions.
306 changes: 83 additions & 223 deletions build/BICAN_schema.yaml

Large diffs are not rendered by default.

306 changes: 83 additions & 223 deletions build/CAP_schema.yaml

Large diffs are not rendered by default.

306 changes: 83 additions & 223 deletions build/general_schema.yaml

Large diffs are not rendered by default.

39 changes: 39 additions & 0 deletions src/cell_annotation_schema/curie_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import os
import json
from pathlib import Path

from importlib import resources
from cell_annotation_schema import resource


class CurieToIriConverter:

DEFAULT_JSONLD_FILE_NAME = 'prefixes.jsonld'

def __init__(self, jsonld_file=DEFAULT_JSONLD_FILE_NAME):
self.jsonld_file = jsonld_file
self.prefix_mappings = self.load_prefix_mappings(self.jsonld_file)

@staticmethod
def load_prefix_mappings(jsonld_file_name):
prefix_resource_file = resources.files(resource) / jsonld_file_name
if os.path.exists(jsonld_file_name):
# read from the path provided by the user
with open(jsonld_file_name, 'r') as file:
prefixes = json.load(file)
elif os.path.exists(prefix_resource_file):
# read from importlib.resources (resource) package
prefixes = json.loads(Path(prefix_resource_file).read_text())
else:
# read from resource folder
resource_dir = os.path.join(os.path.dirname(__file__), "./resource")
prefixes_path = os.path.join(resource_dir, jsonld_file_name)
prefixes = json.loads(Path(prefixes_path).read_text())
return prefixes.get('@context', {})

def curie_to_iri(self, curie):
prefix, reference = curie.split(':', 1)
if prefix in self.prefix_mappings:
return self.prefix_mappings[prefix] + reference
else:
raise ValueError(f"Prefix '{prefix}' not found in prefix mappings.")
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Auto generated from None by pythongen.py version: 0.0.1
# Generation date: 2024-09-23T16:32:47
# Generation date: 2024-10-03T17:13:03
# Schema: General_Cell_Annotation_Open_Standard
#
# id: https://cellular-semantics.sanger.ac.uk/ontology/CAS
Expand Down Expand Up @@ -33,6 +33,7 @@
# Namespaces
CAS = CurieNamespace('CAS', 'https://cellular-semantics.sanger.ac.uk/ontology/CAS/')
CL = CurieNamespace('CL', 'http://purl.obolibrary.org/obo/CL_')
CELLXGENE_DATASET = CurieNamespace('CellXGene_dataset', 'https://cellxgene.cziscience.com/datasets/')
IAO = CurieNamespace('IAO', 'http://purl.obolibrary.org/obo/IAO_')
PCL = CurieNamespace('PCL', 'http://purl.obolibrary.org/obo/PCL_')
RO = CurieNamespace('RO', 'http://purl.obolibrary.org/obo/RO_')
Expand Down Expand Up @@ -513,7 +514,7 @@ class Taxonomy(YAMLRoot):
title: str = None
author_name: str = None
labelsets: Union[Union[dict, Labelset], List[Union[dict, Labelset]]] = None
matrix_file_id: Optional[str] = None
matrix_file_id: Optional[Union[str, URIorCURIE]] = None
description: Optional[str] = None
cellannotation_schema_version: Optional[str] = None
cellannotation_timestamp: Optional[str] = None
Expand All @@ -540,8 +541,8 @@ def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
self.labelsets = [self.labelsets] if self.labelsets is not None else []
self.labelsets = [v if isinstance(v, Labelset) else Labelset(**as_dict(v)) for v in self.labelsets]

if self.matrix_file_id is not None and not isinstance(self.matrix_file_id, str):
self.matrix_file_id = str(self.matrix_file_id)
if self.matrix_file_id is not None and not isinstance(self.matrix_file_id, URIorCURIE):
self.matrix_file_id = URIorCURIE(self.matrix_file_id)

if self.description is not None and not isinstance(self.description, str):
self.description = str(self.description)
Expand Down Expand Up @@ -14356,7 +14357,7 @@ class slots:
model_uri=CELL_ANNOTATION_SCHEMA.author_annotation_fields, domain=None, range=Optional[Union[dict, Any]])

slots.matrix_file_id = Slot(uri=CELL_ANNOTATION_SCHEMA.matrix_file_id, name="matrix_file_id", curie=CELL_ANNOTATION_SCHEMA.curie('matrix_file_id'),
model_uri=CELL_ANNOTATION_SCHEMA.matrix_file_id, domain=None, range=Optional[str])
model_uri=CELL_ANNOTATION_SCHEMA.matrix_file_id, domain=None, range=Optional[Union[str, URIorCURIE]])

slots.title = Slot(uri=CELL_ANNOTATION_SCHEMA.title, name="title", curie=CELL_ANNOTATION_SCHEMA.curie('title'),
model_uri=CELL_ANNOTATION_SCHEMA.title, domain=None, range=str)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Auto generated from None by pythongen.py version: 0.0.1
# Generation date: 2024-09-23T16:32:48
# Generation date: 2024-10-03T17:13:03
# Schema: General_Cell_Annotation_Open_Standard
#
# id: https://cellular-semantics.sanger.ac.uk/ontology/CAS
Expand Down Expand Up @@ -33,6 +33,7 @@
# Namespaces
CAS = CurieNamespace('CAS', 'https://cellular-semantics.sanger.ac.uk/ontology/CAS/')
CL = CurieNamespace('CL', 'http://purl.obolibrary.org/obo/CL_')
CELLXGENE_DATASET = CurieNamespace('CellXGene_dataset', 'https://cellxgene.cziscience.com/datasets/')
IAO = CurieNamespace('IAO', 'http://purl.obolibrary.org/obo/IAO_')
PCL = CurieNamespace('PCL', 'http://purl.obolibrary.org/obo/PCL_')
RO = CurieNamespace('RO', 'http://purl.obolibrary.org/obo/RO_')
Expand Down Expand Up @@ -440,7 +441,7 @@ class Taxonomy(YAMLRoot):
title: str = None
author_name: str = None
labelsets: Union[Union[dict, Labelset], List[Union[dict, Labelset]]] = None
matrix_file_id: Optional[str] = None
matrix_file_id: Optional[Union[str, URIorCURIE]] = None
description: Optional[str] = None
cellannotation_schema_version: Optional[str] = None
cellannotation_timestamp: Optional[str] = None
Expand All @@ -467,8 +468,8 @@ def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
self.labelsets = [self.labelsets] if self.labelsets is not None else []
self.labelsets = [v if isinstance(v, Labelset) else Labelset(**as_dict(v)) for v in self.labelsets]

if self.matrix_file_id is not None and not isinstance(self.matrix_file_id, str):
self.matrix_file_id = str(self.matrix_file_id)
if self.matrix_file_id is not None and not isinstance(self.matrix_file_id, URIorCURIE):
self.matrix_file_id = URIorCURIE(self.matrix_file_id)

if self.description is not None and not isinstance(self.description, str):
self.description = str(self.description)
Expand Down Expand Up @@ -14325,7 +14326,7 @@ class slots:
model_uri=CELL_ANNOTATION_SCHEMA.author_annotation_fields, domain=None, range=Optional[Union[dict, Any]])

slots.matrix_file_id = Slot(uri=CELL_ANNOTATION_SCHEMA.matrix_file_id, name="matrix_file_id", curie=CELL_ANNOTATION_SCHEMA.curie('matrix_file_id'),
model_uri=CELL_ANNOTATION_SCHEMA.matrix_file_id, domain=None, range=Optional[str])
model_uri=CELL_ANNOTATION_SCHEMA.matrix_file_id, domain=None, range=Optional[Union[str, URIorCURIE]])

slots.title = Slot(uri=CELL_ANNOTATION_SCHEMA.title, name="title", curie=CELL_ANNOTATION_SCHEMA.curie('title'),
model_uri=CELL_ANNOTATION_SCHEMA.title, domain=None, range=str)
Expand Down
11 changes: 6 additions & 5 deletions src/cell_annotation_schema/datamodel/cell_annotation_schema.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Auto generated from None by pythongen.py version: 0.0.1
# Generation date: 2024-09-23T16:32:46
# Generation date: 2024-10-03T17:13:02
# Schema: General_Cell_Annotation_Open_Standard
#
# id: https://cellular-semantics.sanger.ac.uk/ontology/CAS
Expand Down Expand Up @@ -34,6 +34,7 @@
# Namespaces
CAS = CurieNamespace('CAS', 'https://cellular-semantics.sanger.ac.uk/ontology/CAS/')
CL = CurieNamespace('CL', 'http://purl.obolibrary.org/obo/CL_')
CELLXGENE_DATASET = CurieNamespace('CellXGene_dataset', 'https://cellxgene.cziscience.com/datasets/')
IAO = CurieNamespace('IAO', 'http://purl.obolibrary.org/obo/IAO_')
PCL = CurieNamespace('PCL', 'http://purl.obolibrary.org/obo/PCL_')
RO = CurieNamespace('RO', 'http://purl.obolibrary.org/obo/RO_')
Expand Down Expand Up @@ -243,7 +244,7 @@ class Taxonomy(YAMLRoot):
author_name: str = None
labelsets: Union[Union[dict, Labelset], List[Union[dict, Labelset]]] = None
annotations: Union[Union[dict, Annotation], List[Union[dict, Annotation]]] = None
matrix_file_id: Optional[str] = None
matrix_file_id: Optional[Union[str, URIorCURIE]] = None
description: Optional[str] = None
cellannotation_schema_version: Optional[str] = None
cellannotation_timestamp: Optional[str] = None
Expand Down Expand Up @@ -276,8 +277,8 @@ def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
self.annotations = [self.annotations] if self.annotations is not None else []
self.annotations = [v if isinstance(v, Annotation) else Annotation(**as_dict(v)) for v in self.annotations]

if self.matrix_file_id is not None and not isinstance(self.matrix_file_id, str):
self.matrix_file_id = str(self.matrix_file_id)
if self.matrix_file_id is not None and not isinstance(self.matrix_file_id, URIorCURIE):
self.matrix_file_id = URIorCURIE(self.matrix_file_id)

if self.description is not None and not isinstance(self.description, str):
self.description = str(self.description)
Expand Down Expand Up @@ -14023,7 +14024,7 @@ class slots:
model_uri=CELL_ANNOTATION_SCHEMA.author_annotation_fields, domain=None, range=Optional[Union[dict, Any]])

slots.matrix_file_id = Slot(uri=CELL_ANNOTATION_SCHEMA.matrix_file_id, name="matrix_file_id", curie=CELL_ANNOTATION_SCHEMA.curie('matrix_file_id'),
model_uri=CELL_ANNOTATION_SCHEMA.matrix_file_id, domain=None, range=Optional[str])
model_uri=CELL_ANNOTATION_SCHEMA.matrix_file_id, domain=None, range=Optional[Union[str, URIorCURIE]])

slots.title = Slot(uri=CELL_ANNOTATION_SCHEMA.title, name="title", curie=CELL_ANNOTATION_SCHEMA.curie('title'),
model_uri=CELL_ANNOTATION_SCHEMA.title, domain=None, range=str)
Expand Down
5 changes: 2 additions & 3 deletions src/cell_annotation_schema/generator/dataclassgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,10 +100,9 @@ def get_py_instance(instance_dict, schema_name, schema_def, root_class=None):

for annotation in py_inst.annotations:
# fix the author_annotation_fields in the json representation to be a string
if annotation.author_annotation_fields:
if annotation.author_annotation_fields and isinstance(annotation.author_annotation_fields, str):
deserialised = json.loads(annotation.author_annotation_fields)
# annotation.author_annotation_fields = deserialised
annotation.author_annotation_fields = {"Cluster ID": "4"}
annotation.author_annotation_fields = deserialised
return py_inst


Expand Down
19 changes: 18 additions & 1 deletion src/cell_annotation_schema/ontology/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,13 @@
from cell_annotation_schema.ontology.schema import DEFAULT_PREFIXES, CAS_NAMESPACE
from cell_annotation_schema.file_utils import get_json_from_file
from cell_annotation_schema.generator.dataclassgen import get_py_instance, get_root_class
from cell_annotation_schema.ontology.dumpers import rdflib_dumper
from cell_annotation_schema.curie_utils import CurieToIriConverter

from linkml_runtime.linkml_model import SchemaDefinition
from linkml_runtime import SchemaView
from linkml_runtime.loaders import yaml_loader
from linkml_runtime.dumpers import rdflib_dumper
# from linkml_runtime.dumpers import rdflib_dumper
from linkml.validator import Validator

CELL_RELATION = "has_cellid"
Expand Down Expand Up @@ -54,6 +56,7 @@ def dump_to_rdf(
if validate:
validate_data(schema_def, instance)
instance = serialise_author_annotation(instance)
instance = resolve_matrix_file(instance)

root_class = get_root_class(schema_name)
py_inst = get_py_instance(instance, None, schema_def, root_class)
Expand Down Expand Up @@ -209,3 +212,17 @@ def serialise_author_annotation(instance: dict) -> dict:
if "author_annotation_fields" in annotation and annotation["author_annotation_fields"]:
annotation["author_annotation_fields"] = json.dumps(annotation["author_annotation_fields"])
return instance


def resolve_matrix_file(instance: dict) -> dict:
"""
Resolves the matrix file curie to iri in the instance.
Args:
instance: The instance to be updated.
Returns:
The updated instance.
"""
if "matrix_file_id" in instance and instance["matrix_file_id"]:
converter = CurieToIriConverter()
instance["matrix_file_id"] = converter.curie_to_iri(str(instance["matrix_file_id"])) + ".cxg/"
return instance
13 changes: 13 additions & 0 deletions src/cell_annotation_schema/ontology/dumpers/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from linkml_runtime.dumpers.json_dumper import JSONDumper
from linkml_runtime.dumpers.rdf_dumper import RDFDumper
from cell_annotation_schema.ontology.dumpers.rdflib_dumper import RDFLibDumper
from linkml_runtime.dumpers.tsv_dumper import TSVDumper
from linkml_runtime.dumpers.yaml_dumper import YAMLDumper
from linkml_runtime.dumpers.csv_dumper import CSVDumper

json_dumper = JSONDumper()
rdf_dumper = RDFDumper()
rdflib_dumper = RDFLibDumper()
yaml_dumper = YAMLDumper()
csv_dumper = CSVDumper()
tsv_dumper = TSVDumper()
Loading

0 comments on commit fa25649

Please sign in to comment.