Skip to content

Commit

Permalink
Dataset IRIs are based on CxG dataset UUID (#82)
Browse files Browse the repository at this point in the history
* Refactor dataset_class URI logic

* Add get_cxg_dataset_url method

* Update assert statements

* Update version to 0.2.23
  • Loading branch information
ubyndr authored Aug 15, 2024
1 parent df3e807 commit 116dfc0
Show file tree
Hide file tree
Showing 5 changed files with 639 additions and 585 deletions.
11 changes: 8 additions & 3 deletions pandasaurus_cxg/graph_generator/graph_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
colour_mapping,
find_and_rotate_center_layout,
generate_subgraph,
get_cxg_dataset_url,
parse_citation_field_into_dict,
remove_special_characters,
select_node_with_property,
Expand Down Expand Up @@ -124,15 +125,19 @@ def generate_rdf_graph(self):
grouped_dict_uuid[str(uuid.uuid4())] = temp_dict

# generate dataset entity and has_source property
dataset_class = URIRef(self.ns[str(uuid.uuid4())])
uns = self.ea.enricher_manager.anndata.uns
citation_dict = {}
if citation_field_name in uns.keys():
citation_dict = parse_citation_field_into_dict(uns[citation_field_name])
dataset_class = URIRef(get_cxg_dataset_url(citation_dict.get("download_link").split("/")[-1].split(".")[0]))
else:
dataset_class = URIRef(self.ns[str(uuid.uuid4())])
self.graph.add((dataset_class, RDF.type, URIRef(DATASET.get("iri"))))
self.graph.add((dataset_class, RDFS.label, Literal(DATASET.get("label"))))
uns = self.ea.enricher_manager.anndata.uns
for key, value in uns.items():
if not isinstance(value, str):
continue
if key == citation_field_name:
citation_dict = parse_citation_field_into_dict(value)
for citation_key, citation_value in citation_dict.items():
self.graph.add(
(
Expand Down
16 changes: 16 additions & 0 deletions pandasaurus_cxg/graph_generator/graph_generator_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,3 +158,19 @@ def parse_citation_field_into_dict(value: str) -> Dict[str, str]:
}
)
return key_value_pairs


def get_cxg_dataset_url(matrix_id: str) -> str:
"""
Constructs a URL for a CellxGene dataset.
Given a matrix ID, this function returns the corresponding URL for accessing
the dataset on the CellxGene platform.
Args:
matrix_id: The unique identifier for the dataset matrix.
Returns:
The full URL to access the dataset on CellxGene.
"""
return f"https://cellxgene.cziscience.com/e/{matrix_id}.cxg/"
Loading

0 comments on commit 116dfc0

Please sign in to comment.