jinja documentation added

brain-bican · Aug 30, 2024 · 53c815d · 53c815d
1 parent 73d9b1b
commit 53c815d
Show file tree

Hide file tree

Showing 12 changed files with 900,891 additions and 16 deletions.
diff --git a/.idea/.gitignore b/.idea/.gitignore
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -0,0 +1 @@
+include resources/*
diff --git a/requirements.txt b/requirements.txt
@@ -1,2 +1,3 @@
 requests
 cas-tools==0.0.1.dev44
+jinja2
diff --git a/resources/annotation_template.md b/resources/annotation_template.md
@@ -0,0 +1,87 @@
+## {{annotation.cell_label}} ({{annotation.cell_set_accession}})
+{% if 'parents' in annotation %}
+<b>Hierarchy: </b>
+{% for parent in annotation.parents %}
+[{{parent}}]({{metadata.purl_base}}{{parent|replace(":", "_")}}) >
+{% endfor %}
+[{{annotation.cell_set_accession}}]({{metadata.purl_base}}{{annotation.cell_set_accession|replace(":", "_")}})
+{% endif %}
+
+---
+
+{% set labelset = metadata.labelsets|selectattr("name", "==", annotation.labelset) | list | first  %}
+
+**Labelset:** {{annotation.labelset}} (Rank: {{labelset.rank}})
+
+{% if 'parent_cell_set_accession' in annotation %}
+{% set parent_annotation = metadata.annotations|selectattr("cell_set_accession", "==", annotation.parent_cell_set_accession) | list | first  %}
+**Parent Cell Set:** {{parent_annotation.cell_label}} ([{{annotation.parent_cell_set_accession}}]({{metadata.purl_base}}{{annotation.parent_cell_set_accession|replace(":", "_")}}))
+{% else %}
+**Parent Cell Set:** -
+{% endif %}
+
+{% if 'cell_fullname' in annotation %}
+{{annotation.cell_fullname}}
+{% endif %}
+
+{% if 'synonyms' in annotation %}
+| Synonyms |
+|----------|
+{% for synonym in annotation.synonyms %}
+|{{synonym}}|
+{% endfor %}
+{% endif %}
+
+**Cell Ontology Term:** {% if 'cell_ontology_term' in annotation %} {{annotation.cell_ontology_term}} ([{{annotation.cell_ontology_term_id}}](https://www.ebi.ac.uk/ols/ontologies/cl/terms?obo_id={{annotation.cell_ontology_term_id}})) {% endif %}
+
+{% if 'rationale' in annotation %}
+
+**Rationale:** {{annotation.rationale}}
+{% endif %}
+{% if 'rationale_dois' in annotation %}
+
+| Rationale DOIs |
+|----------------|
+{% for doi in annotation.rationale_dois %}
+|{{doi}}|
+{% endfor %}
+{% endif %}
+
+[MARKER GENES.]: #
+
+{% if 'marker_gene_evidence' in annotation %}
+
+| Marker Genes |
+|--------------|
+{% for gene in annotation.marker_gene_evidence %}
+|{{gene}}|
+{% endfor %}
+{% endif %}
+
+---
+
+[TRANSFERRED ANNOTATIONS.]: #
+
+{% if 'transferred_annotations' in annotation %}
+
+**Transferred annotations:**
+
+| Transferred cell label | Source taxonomy | Source node accession | Algorithm name | Comment |
+|------------------------|-----------------|-----------------------|----------------|---------|
+{% for at in annotation.transferred_annotations %}
+|{{at.transferred_cell_label}}|{{at.source_taxonomy}}|[{{at.source_node_accession}}]({{at.purl_base}}{{at.source_node_accession|replace(":", "_")}})|{{at.algorithm_name}}|{{at.comment}}|
+{% endfor %}
+{% endif %}
+
+[AUTHOR ANNOTATION FIELDS.]: #
+
+{% if 'author_annotation_fields' in annotation %}
+
+**Author annotation fields:**
+
+| Author annotation | Value |
+|-------------------|-------|
+{% for key, value in annotation.author_annotation_fields.items() %}
+|{{key}}|{{value}}|
+{% endfor %}
+{% endif %}
diff --git a/resources/taxonomy_template.md b/resources/taxonomy_template.md
@@ -0,0 +1,37 @@
+## {{cas.title}}
+
+{{cas.description}}
+
+---
+
+**Matrix File ID:** {{cas.matrix_file_id}}
+
+**Cell Annotation URL:** {{cas.cellannotation_url}}
+
+**Author name:** {{cas.author_name}}
+
+**Author contact:** {{cas.author_contact}}
+
+**Author orcid:** {{cas.orcid}}
+
+{% if 'author_list' in cas %}
+**Author list:** {{cas.author_list}}
+{% endif %}
+
+---
+
+**Cell Annotation Schema Version:** {{cas.cellannotation_schema_version}}
+
+**Cell Annotation Timestamp:** {{cas.cellannotation_timestamp}}
+
+**Cell Annotation Version:** {{cas.cellannotation_version}}
+
+---
+
+**Labelsets:**
+
+| Name | Description | Annotation Method | Rank |
+|------|-------------|-------------------|------|
+{% for labelset in cas.labelsets %}
+|{{labelset.name}}|{{labelset.description}}|{{labelset.annotation_method}}|{{labelset.rank}}|
+{% endfor %}
diff --git a/setup.py b/setup.py
@@ -8,7 +8,7 @@
 
 setup(
     name="tdta",
-    version="0.1.0.dev17",
+    version="0.1.0.dev18",
     description="The aim of this project is to provide taxonomy development tools custom actions.",
     long_description=README,
     long_description_content_type="text/markdown",

diff --git a/src/tdta/__main__.py b/src/tdta/__main__.py
@@ -4,6 +4,7 @@
 from tdta.tdt_export import export_cas_data
 from tdta.anndata_export import export_anndata
 from tdta.version_control import git_update_local
+from tdta.documentation import generate_documentation
 
 
 def main():
@@ -14,6 +15,7 @@ def main():
     create_save_operation_parser(subparsers)
     create_anndata_operation_parser(subparsers)
     create_merge_operation_parser(subparsers)
+    create_docs_operation_parser(subparsers)
 
     args = parser.parse_args()
 
@@ -31,6 +33,8 @@ def main():
         export_anndata(args.database, args.json, args.output, cache_folder_path)
     elif args.action == "merge":
         git_update_local(str(args.project), str(args.message))
+    elif args.action == "docs":
+        generate_documentation(args.database, args.output)
 
 
 def create_purl_operation_parser(subparsers):
@@ -76,5 +80,15 @@ def create_merge_operation_parser(subparsers):
     parser_purl.add_argument('-m', '--message', required=True, help="Commit message.")
 
 
+def create_docs_operation_parser(subparsers):
+    parser_export = subparsers.add_parser("export", add_help=False,
+                                          description="The documentation generation parser",
+                                          help="Generates the taxonomy github pages docs.")
+    parser_export.add_argument('-db', '--database', action='store', type=pathlib.Path, required=True,
+                               help="Database file path.")
+    parser_export.add_argument('-o', '--output', action='store', type=pathlib.Path, required=True,
+                               help="Output file path.")
+
+
 if __name__ == "__main__":
     main()
diff --git a/src/tdta/documentation.py b/src/tdta/documentation.py
@@ -0,0 +1,110 @@
+import os
+from pathlib import Path
+
+from jinja2 import Template
+from urllib.parse import urlparse
+
+from tdta.tdt_export import db_to_cas
+from tdta.utils import read_project_config
+
+ANNOTATIONS_TEMPLATE = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../resources/annotation_template.md")
+TAXONOMY_TEMPLATE = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../resources/taxonomy_template.md")
+
+
+def generate_documentation(sqlite_db: str, output_folder: str, project_config=None):
+    """
+    Generate markdown documentation for a CAS database.
+    Parameters:
+        sqlite_db: Path to the CAS database.
+        output_folder: Path to the output documentation folder.
+        project_config: Project configuration.
+    """
+    if not os.path.exists(output_folder):
+        os.makedirs(output_folder)
+
+    cas_obj = db_to_cas(sqlite_db)
+    cas = cas_obj.to_dict()
+    if project_config is None:
+        project_config = read_project_config(Path(output_folder).parent.absolute())
+    cas = transform_cas(cas, project_config)
+
+    annotation_template = read_jinja_template(ANNOTATIONS_TEMPLATE)
+    for annotation in cas["annotations"]:
+        rendered_file = annotation_template.render(annotation=annotation, metadata=cas)
+        annotation_file_name = annotation["cell_set_accession"].replace(":", "_")
+
+        with open(os.path.join(output_folder,  annotation_file_name + ".md"), "w") as fh:
+            fh.write(rendered_file)
+
+    taxonomy_template = read_jinja_template(TAXONOMY_TEMPLATE)
+    rendered_file = taxonomy_template.render(cas=cas)
+    with open(os.path.join(output_folder, "taxonomy.md"), "w") as fh:
+        fh.write(rendered_file)
+
+
+def transform_cas(cas, project_config):
+    """
+    Adds extra data to cas for visualisation purposes.
+    """
+    add_purl(cas, project_config["id"])
+    add_parents(cas)
+    transform_annotation_transfer(cas)
+
+    return cas
+
+
+def transform_annotation_transfer(cas):
+    for annotation in cas["annotations"]:
+        if "transferred_annotations" in annotation:
+            for transferred_annotation in annotation["transferred_annotations"]:
+                parsed_url = urlparse(transferred_annotation["source_taxonomy"])
+                path_parts = parsed_url.path.split('/')
+                taxonomy_id = path_parts[-2]
+                purl_base = f"{parsed_url.scheme}://{parsed_url.netloc}/taxonomy/{taxonomy_id}#"
+                transferred_annotation["purl_base"] = purl_base
+
+
+def add_purl(cas, project_id):
+    cas["purl_base"] = f"https://purl.brain-bican.org/taxonomy/{project_id}#"
+    if "cellannotation_url" not in cas:
+        cas["cellannotation_url"] = f"https://purl.brain-bican.org/taxonomy/{project_id}/{project_id}.json"
+
+
+def add_parents(cas):
+    parents = build_hierarchy(cas["annotations"])
+    for annotation in cas["annotations"]:
+        annotation["parents"] = parents[annotation["cell_set_accession"]]
+
+
+def build_hierarchy(annotations):
+    """
+    Build a hierarchy of cell sets. Keys of the dicts are cell set accessions, values are lists of parent cell set
+    accessions ordered from highest to lowest.
+    """
+    hierarchy = {}
+    annotation_dict = {annotation['cell_set_accession']: annotation for annotation in annotations}
+
+    def get_hierarchy(annotation):
+        if 'parent_cell_set_accession' not in annotation:
+            return []
+        parent_accession = annotation['parent_cell_set_accession']
+        parent_annotation = annotation_dict.get(parent_accession)
+        if parent_annotation:
+            return get_hierarchy(parent_annotation) + [parent_accession]
+        return []
+
+    for annotation in annotations:
+        cell_set_accession = annotation['cell_set_accession']
+        hierarchy[cell_set_accession] = get_hierarchy(annotation)
+
+    return hierarchy
+
+
+def read_jinja_template(template_path):
+    """
+    Read Jinja template from file.
+    """
+    with open(template_path, 'r') as file:
+        template = Template(file.read(), trim_blocks=True)
+    return template
+
diff --git a/src/tdta/tdt_export.py b/src/tdta/tdt_export.py
@@ -32,21 +32,7 @@ def export_cas_data(sqlite_db: str, output_file: str, dataset_cache_folder: str
     :param output_file: output json path
     :param dataset_cache_folder: anndata cache folder path
     """
-    cta = CellTypeAnnotation("", list(), "")
-
-    cas_tables = get_table_names(sqlite_db)
-    for table_name in cas_tables:
-        if table_name == "metadata":
-            parse_metadata_data(cta, sqlite_db, table_name)
-        elif table_name == "annotation":
-            parse_annotation_data(cta, sqlite_db, table_name)
-        elif table_name == "labelset":
-            parse_labelset_data(cta, sqlite_db, table_name)
-        elif table_name == "annotation_transfer":
-            parse_annotation_transfer_data(cta, sqlite_db, table_name)
-        # elif table_name == "review":
-        #     # don't export reviews to the CAS json for now
-        #     parse_review_data(cta, sqlite_db, table_name)
+    cta = db_to_cas(sqlite_db)
 
     project_config = read_project_config(Path(output_file).parent.absolute())
 
@@ -72,6 +58,24 @@ def export_cas_data(sqlite_db: str, output_file: str, dataset_cache_folder: str
     return cta
 
 
+def db_to_cas(sqlite_db):
+    cta = CellTypeAnnotation("", list(), "")
+    cas_tables = get_table_names(sqlite_db)
+    for table_name in cas_tables:
+        if table_name == "metadata":
+            parse_metadata_data(cta, sqlite_db, table_name)
+        elif table_name == "annotation":
+            parse_annotation_data(cta, sqlite_db, table_name)
+        elif table_name == "labelset":
+            parse_labelset_data(cta, sqlite_db, table_name)
+        elif table_name == "annotation_transfer":
+            parse_annotation_transfer_data(cta, sqlite_db, table_name)
+        # elif table_name == "review":
+        #     # don't export reviews to the CAS json for now
+        #     parse_review_data(cta, sqlite_db, table_name)
+    return cta
+
+
 def ensure_file_size_limit(file_path):
     """
     Checks if the file size exceeds the GitHub size limit and zips the file if needed.

diff --git a/src/test/generate_docs_test.py b/src/test/generate_docs_test.py
@@ -0,0 +1,46 @@
+import unittest
+import os
+import shutil
+import json
+
+from tdta.documentation import generate_documentation, build_hierarchy
+
+TEST_DATA_FOLDER = os.path.join(os.path.dirname(os.path.realpath(__file__)), "test_data/")
+TEST_DB = os.path.join(TEST_DATA_FOLDER, "nanobot_siletti_nn_with_at.db")
+TEST_OUTPUT = os.path.join(TEST_DATA_FOLDER, "docs/")
+
+
+class GenerateDocsTestCase(unittest.TestCase):
+
+    def setUp(self):
+        if os.path.exists(TEST_OUTPUT):
+            shutil.rmtree(TEST_OUTPUT)
+
+    def test_documentation_generation(self):
+        generate_documentation(TEST_DB, TEST_OUTPUT, project_config={"id": "CS202210140"})
+        self.assertTrue(os.path.exists(TEST_OUTPUT))
+
+        self.assertEqual(True, False)  # add assertion here
+
+    def test_hierarchy_breadcrumb(self):
+        with open("./test_data/CS202210140.json") as f:
+            siletti = json.load(f)
+
+        hierarchy = build_hierarchy(siletti["annotations"])
+        self.assertEqual(386, len(list(hierarchy.keys())))
+
+        subcluster_parents = hierarchy["CS202210140_3490"]
+        self.assertEqual(2, len(subcluster_parents))
+        self.assertEqual("CS202210140_469", subcluster_parents[0])
+        self.assertEqual("CS202210140_51", subcluster_parents[1])
+
+        cluster_parents = hierarchy["CS202210140_6"]
+        self.assertEqual(1, len(cluster_parents))
+        self.assertEqual("CS202210140_464", cluster_parents[0])
+
+        supercluster_parents = hierarchy["CS202210140_465"]
+        self.assertEqual(0, len(supercluster_parents))
+
+
+if __name__ == '__main__':
+    unittest.main()