Skip to content

Commit

Permalink
jinja documentation added
Browse files Browse the repository at this point in the history
  • Loading branch information
hkir-dev committed Aug 30, 2024
1 parent 73d9b1b commit 53c815d
Show file tree
Hide file tree
Showing 12 changed files with 900,891 additions and 16 deletions.
3 changes: 3 additions & 0 deletions .idea/.gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
include resources/*
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
requests
cas-tools==0.0.1.dev44
jinja2
87 changes: 87 additions & 0 deletions resources/annotation_template.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
## {{annotation.cell_label}} ({{annotation.cell_set_accession}})
{% if 'parents' in annotation %}
<b>Hierarchy: </b>
{% for parent in annotation.parents %}
[{{parent}}]({{metadata.purl_base}}{{parent|replace(":", "_")}}) >
{% endfor %}
[{{annotation.cell_set_accession}}]({{metadata.purl_base}}{{annotation.cell_set_accession|replace(":", "_")}})
{% endif %}

---

{% set labelset = metadata.labelsets|selectattr("name", "==", annotation.labelset) | list | first %}

**Labelset:** {{annotation.labelset}} (Rank: {{labelset.rank}})

{% if 'parent_cell_set_accession' in annotation %}
{% set parent_annotation = metadata.annotations|selectattr("cell_set_accession", "==", annotation.parent_cell_set_accession) | list | first %}
**Parent Cell Set:** {{parent_annotation.cell_label}} ([{{annotation.parent_cell_set_accession}}]({{metadata.purl_base}}{{annotation.parent_cell_set_accession|replace(":", "_")}}))
{% else %}
**Parent Cell Set:** -
{% endif %}

{% if 'cell_fullname' in annotation %}
{{annotation.cell_fullname}}
{% endif %}

{% if 'synonyms' in annotation %}
| Synonyms |
|----------|
{% for synonym in annotation.synonyms %}
|{{synonym}}|
{% endfor %}
{% endif %}

**Cell Ontology Term:** {% if 'cell_ontology_term' in annotation %} {{annotation.cell_ontology_term}} ([{{annotation.cell_ontology_term_id}}](https://www.ebi.ac.uk/ols/ontologies/cl/terms?obo_id={{annotation.cell_ontology_term_id}})) {% endif %}

{% if 'rationale' in annotation %}

**Rationale:** {{annotation.rationale}}
{% endif %}
{% if 'rationale_dois' in annotation %}

| Rationale DOIs |
|----------------|
{% for doi in annotation.rationale_dois %}
|{{doi}}|
{% endfor %}
{% endif %}

[MARKER GENES.]: #

{% if 'marker_gene_evidence' in annotation %}

| Marker Genes |
|--------------|
{% for gene in annotation.marker_gene_evidence %}
|{{gene}}|
{% endfor %}
{% endif %}

---

[TRANSFERRED ANNOTATIONS.]: #

{% if 'transferred_annotations' in annotation %}

**Transferred annotations:**

| Transferred cell label | Source taxonomy | Source node accession | Algorithm name | Comment |
|------------------------|-----------------|-----------------------|----------------|---------|
{% for at in annotation.transferred_annotations %}
|{{at.transferred_cell_label}}|{{at.source_taxonomy}}|[{{at.source_node_accession}}]({{at.purl_base}}{{at.source_node_accession|replace(":", "_")}})|{{at.algorithm_name}}|{{at.comment}}|
{% endfor %}
{% endif %}

[AUTHOR ANNOTATION FIELDS.]: #

{% if 'author_annotation_fields' in annotation %}

**Author annotation fields:**

| Author annotation | Value |
|-------------------|-------|
{% for key, value in annotation.author_annotation_fields.items() %}
|{{key}}|{{value}}|
{% endfor %}
{% endif %}
37 changes: 37 additions & 0 deletions resources/taxonomy_template.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
## {{cas.title}}

{{cas.description}}

---

**Matrix File ID:** {{cas.matrix_file_id}}

**Cell Annotation URL:** {{cas.cellannotation_url}}

**Author name:** {{cas.author_name}}

**Author contact:** {{cas.author_contact}}

**Author orcid:** {{cas.orcid}}

{% if 'author_list' in cas %}
**Author list:** {{cas.author_list}}
{% endif %}

---

**Cell Annotation Schema Version:** {{cas.cellannotation_schema_version}}

**Cell Annotation Timestamp:** {{cas.cellannotation_timestamp}}

**Cell Annotation Version:** {{cas.cellannotation_version}}

---

**Labelsets:**

| Name | Description | Annotation Method | Rank |
|------|-------------|-------------------|------|
{% for labelset in cas.labelsets %}
|{{labelset.name}}|{{labelset.description}}|{{labelset.annotation_method}}|{{labelset.rank}}|
{% endfor %}
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

setup(
name="tdta",
version="0.1.0.dev17",
version="0.1.0.dev18",
description="The aim of this project is to provide taxonomy development tools custom actions.",
long_description=README,
long_description_content_type="text/markdown",
Expand Down
14 changes: 14 additions & 0 deletions src/tdta/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from tdta.tdt_export import export_cas_data
from tdta.anndata_export import export_anndata
from tdta.version_control import git_update_local
from tdta.documentation import generate_documentation


def main():
Expand All @@ -14,6 +15,7 @@ def main():
create_save_operation_parser(subparsers)
create_anndata_operation_parser(subparsers)
create_merge_operation_parser(subparsers)
create_docs_operation_parser(subparsers)

args = parser.parse_args()

Expand All @@ -31,6 +33,8 @@ def main():
export_anndata(args.database, args.json, args.output, cache_folder_path)
elif args.action == "merge":
git_update_local(str(args.project), str(args.message))
elif args.action == "docs":
generate_documentation(args.database, args.output)


def create_purl_operation_parser(subparsers):
Expand Down Expand Up @@ -76,5 +80,15 @@ def create_merge_operation_parser(subparsers):
parser_purl.add_argument('-m', '--message', required=True, help="Commit message.")


def create_docs_operation_parser(subparsers):
parser_export = subparsers.add_parser("export", add_help=False,
description="The documentation generation parser",
help="Generates the taxonomy github pages docs.")
parser_export.add_argument('-db', '--database', action='store', type=pathlib.Path, required=True,
help="Database file path.")
parser_export.add_argument('-o', '--output', action='store', type=pathlib.Path, required=True,
help="Output file path.")


if __name__ == "__main__":
main()
110 changes: 110 additions & 0 deletions src/tdta/documentation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
import os
from pathlib import Path

from jinja2 import Template
from urllib.parse import urlparse

from tdta.tdt_export import db_to_cas
from tdta.utils import read_project_config

ANNOTATIONS_TEMPLATE = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../resources/annotation_template.md")
TAXONOMY_TEMPLATE = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../resources/taxonomy_template.md")


def generate_documentation(sqlite_db: str, output_folder: str, project_config=None):
"""
Generate markdown documentation for a CAS database.
Parameters:
sqlite_db: Path to the CAS database.
output_folder: Path to the output documentation folder.
project_config: Project configuration.
"""
if not os.path.exists(output_folder):
os.makedirs(output_folder)

cas_obj = db_to_cas(sqlite_db)
cas = cas_obj.to_dict()
if project_config is None:
project_config = read_project_config(Path(output_folder).parent.absolute())
cas = transform_cas(cas, project_config)

annotation_template = read_jinja_template(ANNOTATIONS_TEMPLATE)
for annotation in cas["annotations"]:
rendered_file = annotation_template.render(annotation=annotation, metadata=cas)
annotation_file_name = annotation["cell_set_accession"].replace(":", "_")

with open(os.path.join(output_folder, annotation_file_name + ".md"), "w") as fh:
fh.write(rendered_file)

taxonomy_template = read_jinja_template(TAXONOMY_TEMPLATE)
rendered_file = taxonomy_template.render(cas=cas)
with open(os.path.join(output_folder, "taxonomy.md"), "w") as fh:
fh.write(rendered_file)


def transform_cas(cas, project_config):
"""
Adds extra data to cas for visualisation purposes.
"""
add_purl(cas, project_config["id"])
add_parents(cas)
transform_annotation_transfer(cas)

return cas


def transform_annotation_transfer(cas):
for annotation in cas["annotations"]:
if "transferred_annotations" in annotation:
for transferred_annotation in annotation["transferred_annotations"]:
parsed_url = urlparse(transferred_annotation["source_taxonomy"])
path_parts = parsed_url.path.split('/')
taxonomy_id = path_parts[-2]
purl_base = f"{parsed_url.scheme}://{parsed_url.netloc}/taxonomy/{taxonomy_id}#"
transferred_annotation["purl_base"] = purl_base


def add_purl(cas, project_id):
cas["purl_base"] = f"https://purl.brain-bican.org/taxonomy/{project_id}#"
if "cellannotation_url" not in cas:
cas["cellannotation_url"] = f"https://purl.brain-bican.org/taxonomy/{project_id}/{project_id}.json"


def add_parents(cas):
parents = build_hierarchy(cas["annotations"])
for annotation in cas["annotations"]:
annotation["parents"] = parents[annotation["cell_set_accession"]]


def build_hierarchy(annotations):
"""
Build a hierarchy of cell sets. Keys of the dicts are cell set accessions, values are lists of parent cell set
accessions ordered from highest to lowest.
"""
hierarchy = {}
annotation_dict = {annotation['cell_set_accession']: annotation for annotation in annotations}

def get_hierarchy(annotation):
if 'parent_cell_set_accession' not in annotation:
return []
parent_accession = annotation['parent_cell_set_accession']
parent_annotation = annotation_dict.get(parent_accession)
if parent_annotation:
return get_hierarchy(parent_annotation) + [parent_accession]
return []

for annotation in annotations:
cell_set_accession = annotation['cell_set_accession']
hierarchy[cell_set_accession] = get_hierarchy(annotation)

return hierarchy


def read_jinja_template(template_path):
"""
Read Jinja template from file.
"""
with open(template_path, 'r') as file:
template = Template(file.read(), trim_blocks=True)
return template

34 changes: 19 additions & 15 deletions src/tdta/tdt_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,21 +32,7 @@ def export_cas_data(sqlite_db: str, output_file: str, dataset_cache_folder: str
:param output_file: output json path
:param dataset_cache_folder: anndata cache folder path
"""
cta = CellTypeAnnotation("", list(), "")

cas_tables = get_table_names(sqlite_db)
for table_name in cas_tables:
if table_name == "metadata":
parse_metadata_data(cta, sqlite_db, table_name)
elif table_name == "annotation":
parse_annotation_data(cta, sqlite_db, table_name)
elif table_name == "labelset":
parse_labelset_data(cta, sqlite_db, table_name)
elif table_name == "annotation_transfer":
parse_annotation_transfer_data(cta, sqlite_db, table_name)
# elif table_name == "review":
# # don't export reviews to the CAS json for now
# parse_review_data(cta, sqlite_db, table_name)
cta = db_to_cas(sqlite_db)

project_config = read_project_config(Path(output_file).parent.absolute())

Expand All @@ -72,6 +58,24 @@ def export_cas_data(sqlite_db: str, output_file: str, dataset_cache_folder: str
return cta


def db_to_cas(sqlite_db):
cta = CellTypeAnnotation("", list(), "")
cas_tables = get_table_names(sqlite_db)
for table_name in cas_tables:
if table_name == "metadata":
parse_metadata_data(cta, sqlite_db, table_name)
elif table_name == "annotation":
parse_annotation_data(cta, sqlite_db, table_name)
elif table_name == "labelset":
parse_labelset_data(cta, sqlite_db, table_name)
elif table_name == "annotation_transfer":
parse_annotation_transfer_data(cta, sqlite_db, table_name)
# elif table_name == "review":
# # don't export reviews to the CAS json for now
# parse_review_data(cta, sqlite_db, table_name)
return cta


def ensure_file_size_limit(file_path):
"""
Checks if the file size exceeds the GitHub size limit and zips the file if needed.
Expand Down
46 changes: 46 additions & 0 deletions src/test/generate_docs_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import unittest
import os
import shutil
import json

from tdta.documentation import generate_documentation, build_hierarchy

TEST_DATA_FOLDER = os.path.join(os.path.dirname(os.path.realpath(__file__)), "test_data/")
TEST_DB = os.path.join(TEST_DATA_FOLDER, "nanobot_siletti_nn_with_at.db")
TEST_OUTPUT = os.path.join(TEST_DATA_FOLDER, "docs/")


class GenerateDocsTestCase(unittest.TestCase):

def setUp(self):
if os.path.exists(TEST_OUTPUT):
shutil.rmtree(TEST_OUTPUT)

def test_documentation_generation(self):
generate_documentation(TEST_DB, TEST_OUTPUT, project_config={"id": "CS202210140"})
self.assertTrue(os.path.exists(TEST_OUTPUT))

self.assertEqual(True, False) # add assertion here

def test_hierarchy_breadcrumb(self):
with open("./test_data/CS202210140.json") as f:
siletti = json.load(f)

hierarchy = build_hierarchy(siletti["annotations"])
self.assertEqual(386, len(list(hierarchy.keys())))

subcluster_parents = hierarchy["CS202210140_3490"]
self.assertEqual(2, len(subcluster_parents))
self.assertEqual("CS202210140_469", subcluster_parents[0])
self.assertEqual("CS202210140_51", subcluster_parents[1])

cluster_parents = hierarchy["CS202210140_6"]
self.assertEqual(1, len(cluster_parents))
self.assertEqual("CS202210140_464", cluster_parents[0])

supercluster_parents = hierarchy["CS202210140_465"]
self.assertEqual(0, len(supercluster_parents))


if __name__ == '__main__':
unittest.main()
Loading

0 comments on commit 53c815d

Please sign in to comment.