Skip to content

Commit 07f758e

Browse files
authored
Merge pull request #48 from brain-bican/docs
jinja documentation support added
2 parents 73d9b1b + 53c815d commit 07f758e

12 files changed

+900891
-16
lines changed

.idea/.gitignore

+3
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

MANIFEST.in

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
include resources/*

requirements.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
requests
22
cas-tools==0.0.1.dev44
3+
jinja2

resources/annotation_template.md

+87
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
## {{annotation.cell_label}} ({{annotation.cell_set_accession}})
2+
{% if 'parents' in annotation %}
3+
<b>Hierarchy: </b>
4+
{% for parent in annotation.parents %}
5+
[{{parent}}]({{metadata.purl_base}}{{parent|replace(":", "_")}}) >
6+
{% endfor %}
7+
[{{annotation.cell_set_accession}}]({{metadata.purl_base}}{{annotation.cell_set_accession|replace(":", "_")}})
8+
{% endif %}
9+
10+
---
11+
12+
{% set labelset = metadata.labelsets|selectattr("name", "==", annotation.labelset) | list | first %}
13+
14+
**Labelset:** {{annotation.labelset}} (Rank: {{labelset.rank}})
15+
16+
{% if 'parent_cell_set_accession' in annotation %}
17+
{% set parent_annotation = metadata.annotations|selectattr("cell_set_accession", "==", annotation.parent_cell_set_accession) | list | first %}
18+
**Parent Cell Set:** {{parent_annotation.cell_label}} ([{{annotation.parent_cell_set_accession}}]({{metadata.purl_base}}{{annotation.parent_cell_set_accession|replace(":", "_")}}))
19+
{% else %}
20+
**Parent Cell Set:** -
21+
{% endif %}
22+
23+
{% if 'cell_fullname' in annotation %}
24+
{{annotation.cell_fullname}}
25+
{% endif %}
26+
27+
{% if 'synonyms' in annotation %}
28+
| Synonyms |
29+
|----------|
30+
{% for synonym in annotation.synonyms %}
31+
|{{synonym}}|
32+
{% endfor %}
33+
{% endif %}
34+
35+
**Cell Ontology Term:** {% if 'cell_ontology_term' in annotation %} {{annotation.cell_ontology_term}} ([{{annotation.cell_ontology_term_id}}](https://www.ebi.ac.uk/ols/ontologies/cl/terms?obo_id={{annotation.cell_ontology_term_id}})) {% endif %}
36+
37+
{% if 'rationale' in annotation %}
38+
39+
**Rationale:** {{annotation.rationale}}
40+
{% endif %}
41+
{% if 'rationale_dois' in annotation %}
42+
43+
| Rationale DOIs |
44+
|----------------|
45+
{% for doi in annotation.rationale_dois %}
46+
|{{doi}}|
47+
{% endfor %}
48+
{% endif %}
49+
50+
[MARKER GENES.]: #
51+
52+
{% if 'marker_gene_evidence' in annotation %}
53+
54+
| Marker Genes |
55+
|--------------|
56+
{% for gene in annotation.marker_gene_evidence %}
57+
|{{gene}}|
58+
{% endfor %}
59+
{% endif %}
60+
61+
---
62+
63+
[TRANSFERRED ANNOTATIONS.]: #
64+
65+
{% if 'transferred_annotations' in annotation %}
66+
67+
**Transferred annotations:**
68+
69+
| Transferred cell label | Source taxonomy | Source node accession | Algorithm name | Comment |
70+
|------------------------|-----------------|-----------------------|----------------|---------|
71+
{% for at in annotation.transferred_annotations %}
72+
|{{at.transferred_cell_label}}|{{at.source_taxonomy}}|[{{at.source_node_accession}}]({{at.purl_base}}{{at.source_node_accession|replace(":", "_")}})|{{at.algorithm_name}}|{{at.comment}}|
73+
{% endfor %}
74+
{% endif %}
75+
76+
[AUTHOR ANNOTATION FIELDS.]: #
77+
78+
{% if 'author_annotation_fields' in annotation %}
79+
80+
**Author annotation fields:**
81+
82+
| Author annotation | Value |
83+
|-------------------|-------|
84+
{% for key, value in annotation.author_annotation_fields.items() %}
85+
|{{key}}|{{value}}|
86+
{% endfor %}
87+
{% endif %}

resources/taxonomy_template.md

+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
## {{cas.title}}
2+
3+
{{cas.description}}
4+
5+
---
6+
7+
**Matrix File ID:** {{cas.matrix_file_id}}
8+
9+
**Cell Annotation URL:** {{cas.cellannotation_url}}
10+
11+
**Author name:** {{cas.author_name}}
12+
13+
**Author contact:** {{cas.author_contact}}
14+
15+
**Author orcid:** {{cas.orcid}}
16+
17+
{% if 'author_list' in cas %}
18+
**Author list:** {{cas.author_list}}
19+
{% endif %}
20+
21+
---
22+
23+
**Cell Annotation Schema Version:** {{cas.cellannotation_schema_version}}
24+
25+
**Cell Annotation Timestamp:** {{cas.cellannotation_timestamp}}
26+
27+
**Cell Annotation Version:** {{cas.cellannotation_version}}
28+
29+
---
30+
31+
**Labelsets:**
32+
33+
| Name | Description | Annotation Method | Rank |
34+
|------|-------------|-------------------|------|
35+
{% for labelset in cas.labelsets %}
36+
|{{labelset.name}}|{{labelset.description}}|{{labelset.annotation_method}}|{{labelset.rank}}|
37+
{% endfor %}

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
setup(
1010
name="tdta",
11-
version="0.1.0.dev17",
11+
version="0.1.0.dev18",
1212
description="The aim of this project is to provide taxonomy development tools custom actions.",
1313
long_description=README,
1414
long_description_content_type="text/markdown",

src/tdta/__main__.py

+14
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from tdta.tdt_export import export_cas_data
55
from tdta.anndata_export import export_anndata
66
from tdta.version_control import git_update_local
7+
from tdta.documentation import generate_documentation
78

89

910
def main():
@@ -14,6 +15,7 @@ def main():
1415
create_save_operation_parser(subparsers)
1516
create_anndata_operation_parser(subparsers)
1617
create_merge_operation_parser(subparsers)
18+
create_docs_operation_parser(subparsers)
1719

1820
args = parser.parse_args()
1921

@@ -31,6 +33,8 @@ def main():
3133
export_anndata(args.database, args.json, args.output, cache_folder_path)
3234
elif args.action == "merge":
3335
git_update_local(str(args.project), str(args.message))
36+
elif args.action == "docs":
37+
generate_documentation(args.database, args.output)
3438

3539

3640
def create_purl_operation_parser(subparsers):
@@ -76,5 +80,15 @@ def create_merge_operation_parser(subparsers):
7680
parser_purl.add_argument('-m', '--message', required=True, help="Commit message.")
7781

7882

83+
def create_docs_operation_parser(subparsers):
84+
parser_export = subparsers.add_parser("export", add_help=False,
85+
description="The documentation generation parser",
86+
help="Generates the taxonomy github pages docs.")
87+
parser_export.add_argument('-db', '--database', action='store', type=pathlib.Path, required=True,
88+
help="Database file path.")
89+
parser_export.add_argument('-o', '--output', action='store', type=pathlib.Path, required=True,
90+
help="Output file path.")
91+
92+
7993
if __name__ == "__main__":
8094
main()

src/tdta/documentation.py

+110
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
import os
2+
from pathlib import Path
3+
4+
from jinja2 import Template
5+
from urllib.parse import urlparse
6+
7+
from tdta.tdt_export import db_to_cas
8+
from tdta.utils import read_project_config
9+
10+
ANNOTATIONS_TEMPLATE = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../resources/annotation_template.md")
11+
TAXONOMY_TEMPLATE = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../resources/taxonomy_template.md")
12+
13+
14+
def generate_documentation(sqlite_db: str, output_folder: str, project_config=None):
15+
"""
16+
Generate markdown documentation for a CAS database.
17+
Parameters:
18+
sqlite_db: Path to the CAS database.
19+
output_folder: Path to the output documentation folder.
20+
project_config: Project configuration.
21+
"""
22+
if not os.path.exists(output_folder):
23+
os.makedirs(output_folder)
24+
25+
cas_obj = db_to_cas(sqlite_db)
26+
cas = cas_obj.to_dict()
27+
if project_config is None:
28+
project_config = read_project_config(Path(output_folder).parent.absolute())
29+
cas = transform_cas(cas, project_config)
30+
31+
annotation_template = read_jinja_template(ANNOTATIONS_TEMPLATE)
32+
for annotation in cas["annotations"]:
33+
rendered_file = annotation_template.render(annotation=annotation, metadata=cas)
34+
annotation_file_name = annotation["cell_set_accession"].replace(":", "_")
35+
36+
with open(os.path.join(output_folder, annotation_file_name + ".md"), "w") as fh:
37+
fh.write(rendered_file)
38+
39+
taxonomy_template = read_jinja_template(TAXONOMY_TEMPLATE)
40+
rendered_file = taxonomy_template.render(cas=cas)
41+
with open(os.path.join(output_folder, "taxonomy.md"), "w") as fh:
42+
fh.write(rendered_file)
43+
44+
45+
def transform_cas(cas, project_config):
46+
"""
47+
Adds extra data to cas for visualisation purposes.
48+
"""
49+
add_purl(cas, project_config["id"])
50+
add_parents(cas)
51+
transform_annotation_transfer(cas)
52+
53+
return cas
54+
55+
56+
def transform_annotation_transfer(cas):
57+
for annotation in cas["annotations"]:
58+
if "transferred_annotations" in annotation:
59+
for transferred_annotation in annotation["transferred_annotations"]:
60+
parsed_url = urlparse(transferred_annotation["source_taxonomy"])
61+
path_parts = parsed_url.path.split('/')
62+
taxonomy_id = path_parts[-2]
63+
purl_base = f"{parsed_url.scheme}://{parsed_url.netloc}/taxonomy/{taxonomy_id}#"
64+
transferred_annotation["purl_base"] = purl_base
65+
66+
67+
def add_purl(cas, project_id):
68+
cas["purl_base"] = f"https://purl.brain-bican.org/taxonomy/{project_id}#"
69+
if "cellannotation_url" not in cas:
70+
cas["cellannotation_url"] = f"https://purl.brain-bican.org/taxonomy/{project_id}/{project_id}.json"
71+
72+
73+
def add_parents(cas):
74+
parents = build_hierarchy(cas["annotations"])
75+
for annotation in cas["annotations"]:
76+
annotation["parents"] = parents[annotation["cell_set_accession"]]
77+
78+
79+
def build_hierarchy(annotations):
80+
"""
81+
Build a hierarchy of cell sets. Keys of the dicts are cell set accessions, values are lists of parent cell set
82+
accessions ordered from highest to lowest.
83+
"""
84+
hierarchy = {}
85+
annotation_dict = {annotation['cell_set_accession']: annotation for annotation in annotations}
86+
87+
def get_hierarchy(annotation):
88+
if 'parent_cell_set_accession' not in annotation:
89+
return []
90+
parent_accession = annotation['parent_cell_set_accession']
91+
parent_annotation = annotation_dict.get(parent_accession)
92+
if parent_annotation:
93+
return get_hierarchy(parent_annotation) + [parent_accession]
94+
return []
95+
96+
for annotation in annotations:
97+
cell_set_accession = annotation['cell_set_accession']
98+
hierarchy[cell_set_accession] = get_hierarchy(annotation)
99+
100+
return hierarchy
101+
102+
103+
def read_jinja_template(template_path):
104+
"""
105+
Read Jinja template from file.
106+
"""
107+
with open(template_path, 'r') as file:
108+
template = Template(file.read(), trim_blocks=True)
109+
return template
110+

src/tdta/tdt_export.py

+19-15
Original file line numberDiff line numberDiff line change
@@ -32,21 +32,7 @@ def export_cas_data(sqlite_db: str, output_file: str, dataset_cache_folder: str
3232
:param output_file: output json path
3333
:param dataset_cache_folder: anndata cache folder path
3434
"""
35-
cta = CellTypeAnnotation("", list(), "")
36-
37-
cas_tables = get_table_names(sqlite_db)
38-
for table_name in cas_tables:
39-
if table_name == "metadata":
40-
parse_metadata_data(cta, sqlite_db, table_name)
41-
elif table_name == "annotation":
42-
parse_annotation_data(cta, sqlite_db, table_name)
43-
elif table_name == "labelset":
44-
parse_labelset_data(cta, sqlite_db, table_name)
45-
elif table_name == "annotation_transfer":
46-
parse_annotation_transfer_data(cta, sqlite_db, table_name)
47-
# elif table_name == "review":
48-
# # don't export reviews to the CAS json for now
49-
# parse_review_data(cta, sqlite_db, table_name)
35+
cta = db_to_cas(sqlite_db)
5036

5137
project_config = read_project_config(Path(output_file).parent.absolute())
5238

@@ -72,6 +58,24 @@ def export_cas_data(sqlite_db: str, output_file: str, dataset_cache_folder: str
7258
return cta
7359

7460

61+
def db_to_cas(sqlite_db):
62+
cta = CellTypeAnnotation("", list(), "")
63+
cas_tables = get_table_names(sqlite_db)
64+
for table_name in cas_tables:
65+
if table_name == "metadata":
66+
parse_metadata_data(cta, sqlite_db, table_name)
67+
elif table_name == "annotation":
68+
parse_annotation_data(cta, sqlite_db, table_name)
69+
elif table_name == "labelset":
70+
parse_labelset_data(cta, sqlite_db, table_name)
71+
elif table_name == "annotation_transfer":
72+
parse_annotation_transfer_data(cta, sqlite_db, table_name)
73+
# elif table_name == "review":
74+
# # don't export reviews to the CAS json for now
75+
# parse_review_data(cta, sqlite_db, table_name)
76+
return cta
77+
78+
7579
def ensure_file_size_limit(file_path):
7680
"""
7781
Checks if the file size exceeds the GitHub size limit and zips the file if needed.

src/test/generate_docs_test.py

+46
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
import unittest
2+
import os
3+
import shutil
4+
import json
5+
6+
from tdta.documentation import generate_documentation, build_hierarchy
7+
8+
TEST_DATA_FOLDER = os.path.join(os.path.dirname(os.path.realpath(__file__)), "test_data/")
9+
TEST_DB = os.path.join(TEST_DATA_FOLDER, "nanobot_siletti_nn_with_at.db")
10+
TEST_OUTPUT = os.path.join(TEST_DATA_FOLDER, "docs/")
11+
12+
13+
class GenerateDocsTestCase(unittest.TestCase):
14+
15+
def setUp(self):
16+
if os.path.exists(TEST_OUTPUT):
17+
shutil.rmtree(TEST_OUTPUT)
18+
19+
def test_documentation_generation(self):
20+
generate_documentation(TEST_DB, TEST_OUTPUT, project_config={"id": "CS202210140"})
21+
self.assertTrue(os.path.exists(TEST_OUTPUT))
22+
23+
self.assertEqual(True, False) # add assertion here
24+
25+
def test_hierarchy_breadcrumb(self):
26+
with open("./test_data/CS202210140.json") as f:
27+
siletti = json.load(f)
28+
29+
hierarchy = build_hierarchy(siletti["annotations"])
30+
self.assertEqual(386, len(list(hierarchy.keys())))
31+
32+
subcluster_parents = hierarchy["CS202210140_3490"]
33+
self.assertEqual(2, len(subcluster_parents))
34+
self.assertEqual("CS202210140_469", subcluster_parents[0])
35+
self.assertEqual("CS202210140_51", subcluster_parents[1])
36+
37+
cluster_parents = hierarchy["CS202210140_6"]
38+
self.assertEqual(1, len(cluster_parents))
39+
self.assertEqual("CS202210140_464", cluster_parents[0])
40+
41+
supercluster_parents = hierarchy["CS202210140_465"]
42+
self.assertEqual(0, len(supercluster_parents))
43+
44+
45+
if __name__ == '__main__':
46+
unittest.main()

0 commit comments

Comments
 (0)