diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml index 619d934..f43c74c 100644 --- a/.github/workflows/workflow.yml +++ b/.github/workflows/workflow.yml @@ -33,6 +33,6 @@ jobs: - name: Build docs run: | - mkdocs build --strict + make build # TODO: Internal link check diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 6dae4dc..317aea9 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -6,6 +6,9 @@ build: os: ubuntu-24.04 tools: python: "3" + jobs: + pre_build: + - make pre-build mkdocs: configuration: mkdocs.yml diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..05f90b7 --- /dev/null +++ b/Makefile @@ -0,0 +1,14 @@ + +pre-build: + python ./plugins/create_category_pages.py + +build: pre-build + mkdocs build --strict + +serve: pre-build + mkdocs serve + +.PHONY: clean +clean: + rm -f docs/categories/* + rm -rf site/* diff --git a/README.md b/README.md index 74bf6a7..58b278c 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,6 @@ # UK TRE Glossary -[![Build](https://github.com/manics/uktre-glossary-rtd/actions/workflows/workflow.yaml/badge.svg)](https://github.com/manics/uktre-glossary-rtd/actions/workflows/workflow.yaml) +[![Build](https://github.com/manics/uktre-glossary/actions/workflows/workflow.yml/badge.svg)](https://github.com/manics/uktre-glossary/actions/workflows/workflow.yml) +[![readthedocs](https://app.readthedocs.org/projects/uktre-glossary/badge/?version=latest)](https://uktre-glossary.readthedocs.io/) **⚠️⚠️⚠️⚠️⚠️ Under development ⚠️⚠️⚠️⚠️⚠️** diff --git a/assets/uktre-glossary.yaml b/assets/uktre-glossary.yaml index 0ff5961..570d1e5 100644 --- a/assets/uktre-glossary.yaml +++ b/assets/uktre-glossary.yaml @@ -1,22 +1,3 @@ -categories: - - Analysis - - Computing - - Data Management - - Data in General - - Data in general - - Health Research - - Health Services & Health Data - - Identifiability - - Management - - Other - - Processes - - Research Management - - Risk Management - - Running and Overseeing Research - - Running and overseeing research - - Security Management - - Special aspects in the NHS Context - - UK law and rules glossary: - term: AAI tags: @@ -397,7 +378,7 @@ glossary: A person’s health records that are held digitally on a computer (as opposed to on paper). Also known as an electronic patient record (EPR). - term: Ethical approvals tags: - - Running and Overseeing Research + - Running and overseeing research definition: |- Ethical approvals are like getting the green light from a group of experts who make sure that research is done in a proper and respectful way. They ensure that participants' rights are protected and everything is conducted responsibly. It's like having a permission slip before starting the research to ensure everything is fair and safe. - term: European Union (EU) General Data Protection Regulation (GDPR) @@ -546,7 +527,7 @@ glossary: For example: joining a health dataset with an employment dataset using a common key based on individual names and addresses. - term: Longitudinal Dataset tags: - - Data in General + - Data in general definition: |- A collection of data related to the same group of people over a long time to see how things change. This may involve asking the same questions at different ages. - term: Machine Learning (ML) diff --git a/docs/categories/.gitignore b/docs/categories/.gitignore new file mode 100644 index 0000000..72e8ffc --- /dev/null +++ b/docs/categories/.gitignore @@ -0,0 +1 @@ +* diff --git a/plugins/create_category_pages.py b/plugins/create_category_pages.py new file mode 100755 index 0000000..686780f --- /dev/null +++ b/plugins/create_category_pages.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python +from pathlib import Path +import re +import yaml + + +def _slugify(s: str): + return re.sub(r"\W", "-", s.lower()) + +ROOT = Path(__file__).parent / ".." + +TEMPLATE = """ +# %CATEGORY% + +{{ read_yaml("uktre-glossary.yaml", record_path="glossary", category="%CATEGORY%") }} +""" + +with open(ROOT / "assets" / "uktre-glossary.yaml") as f: + data = yaml.safe_load(f) +categories = set(t for term in data["glossary"] for t in term["tags"]) + +# Check categories don't have inconsistent names +slugs = {} +for category in categories: + slug = _slugify(category) + if slug in slugs: + raise ValueError(f"Category has inconsistent naming: '{slugs[slug]}' '{category}'") + slugs[slug] = category + +for slug, category in sorted(slugs.items()): + print(f"{category:<40} {slug}") + with open(ROOT / "docs" / "categories" / f"{slug}.md", "w") as f: + f.write(TEMPLATE.replace("%CATEGORY%", category)) diff --git a/plugins/mkdocs-uktre-glossary-plugin/src/mkdocs_uktre_glossary_plugin/readers.py b/plugins/mkdocs-uktre-glossary-plugin/src/mkdocs_uktre_glossary_plugin/readers.py index 386f1f3..8dd9d4a 100644 --- a/plugins/mkdocs-uktre-glossary-plugin/src/mkdocs_uktre_glossary_plugin/readers.py +++ b/plugins/mkdocs-uktre-glossary-plugin/src/mkdocs_uktre_glossary_plugin/readers.py @@ -23,7 +23,7 @@ def link_urls(s: str): s = re.sub(rf"(https?://[\S]+[^{trailing_punctuation}])", r"[\1](\1)", s) return s -def _crossref_terms(text): +def _crossref_terms(text, parent): # Find [...] but not [...](...) matches = re.findall(r"(\[[^]]+\])([^(]|$)", text) # Get the first capture group @@ -31,37 +31,59 @@ def _crossref_terms(text): for crossref in crossrefs: target_term = crossref[1:-1] - link_target = "#term-" + _slugify(target_term) + link_target = f"{parent}#term-{_slugify(target_term)}" link_md = f"[{target_term}]({link_target})" text = text.replace(crossref, link_md) return text -def to_glossary_html(df, **kwargs): +def to_glossary_html(df, category="", **kwargs): """ df.to_markdown() escapes some HTML, so create a HTML table ourselves """ if kwargs: raise ValueError(f"Unsupported kwargs: {kwargs}") - out = """ + # Don't show tags column if this is a single category + th_tags = "" if category else "Tags" + out = f""" - + {th_tags} """ - for row in df.itertuples(index=False): + + if category: + # Duplicate rows with multiple tags, one per tag + selected = df.explode("tags") + selected = selected[selected["tags"] == category] + else: + selected = df + + for row in selected.itertuples(index=False): anchor = "term-" + _slugify(row.term) - crossreferenced = _crossref_terms(link_urls(row.definition)) + term = escape(row.term) + + if category: + # Don't show tags column if this is a single category + tags = "" + # Need to link to top-level glossary since terms may not be in this category + parent = "../../" + else: + tags = "".join(markdown(escape(f"[{c}](categories/{_slugify(c)})")) for c in row.tags) + tags = f"" + parent = "" + + crossreferenced = _crossref_terms(link_urls(row.definition), parent) definition = markdown(escape(crossreferenced)) row = f""" - - - + + {tags} + """ out += row diff --git a/requirements.in b/requirements.in index 6790d9d..1e09d33 100644 --- a/requirements.in +++ b/requirements.in @@ -1,3 +1,3 @@ -file:plugins/mkdocs-uktre-glossary-plugin#egg=mkdocs-uktre-glossary-plugin +-e file:plugins/mkdocs-uktre-glossary-plugin#egg=mkdocs-uktre-glossary-plugin mkdocs mkdocs-material diff --git a/requirements.txt b/requirements.txt index f8f4b6f..56b3d31 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,6 +4,8 @@ # # pip-compile # +-e file:plugins/mkdocs-uktre-glossary-plugin#egg=mkdocs-uktre-glossary-plugin + # via -r requirements.in babel==2.17.0 # via mkdocs-material backrefs==5.8 @@ -51,8 +53,6 @@ mkdocs-material-extensions==1.3.1 # via mkdocs-material mkdocs-table-reader-plugin==3.1.0 # via mkdocs-uktre-glossary-plugin -file:plugins/mkdocs-uktre-glossary-plugin#egg=mkdocs-uktre-glossary-plugin - # via -r requirements.in numpy==2.2.4 # via pandas packaging==24.2
TermTagsDefinition
{tags}
{escape(row.term)}{escape(", ".join(row.tags))}{markdown(definition)}{term}{definition}