Skip to content

Commit bf9b21e

Browse files
committed
add comments and helper functions for easier future maintenance
1 parent fbca795 commit bf9b21e

File tree

1 file changed

+86
-49
lines changed

1 file changed

+86
-49
lines changed

scripts/hooks.py

Lines changed: 86 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@
33
import pandas as pd
44
import yaml
55

6-
# Data models for the documentation site: filename -> page title
6+
# --- Configuration ---
7+
8+
# Data models to display on the documentation site: filename -> page title
79
DATA_MODELS = {
810
"dataset": "Dataset",
911
"sharingPlans": "Dataset Sharing Plan",
@@ -16,6 +18,7 @@
1618
"tool": "Tool",
1719
}
1820

21+
# Columns to render for the full field reference tables.
1922
COLS_TO_RENDER = [
2023
"Attribute",
2124
"Description",
@@ -24,30 +27,56 @@
2427
"Examples",
2528
]
2629

27-
MAPPING_FILE = join("modules", "mapping.yaml")
28-
NAVIGATION_FILE = "nav.yml"
30+
MAPPING_FILENAME = "mapping.yaml"
31+
NAVIGATION_FILENAME = "nav.yml"
32+
ANNOTATIONS_FILENAME = "annotationProperty.csv"
33+
EXAMPLE_FILENAME = "exampleColumn.csv"
34+
REFERENCE_FILENAME = "reference.csv"
35+
36+
37+
# --- Helper Functions ---
38+
def _create_markdown_link(attribute: str, model: str) -> str:
39+
"""Create markdown link to list of valid values for the given attribute."""
40+
link_prefix = f"../valid_values/{model}.md#attribute"
41+
slug = attribute.lower().replace(" ", "-")
42+
return f"[{attribute}]({link_prefix}-{slug})"
43+
44+
45+
def _escape_backslashes(text: str) -> str:
46+
"""Escape backslashes for proper markdown rendering."""
47+
return text.replace(r"\\", r"\\\\")
2948

3049

31-
def generate_linked_model(model: str):
32-
"""Generates a CSV with linked attributes to standard term definitions.
50+
def _format_validation_rules(rules: str) -> str:
51+
"""Format validation rules, replacing empty strings with '_None_'."""
52+
return _escape_backslashes(rules) if rules else "_None_"
3353

34-
Desired markdown: render model template so that
54+
55+
# --- Core logic functions ---
56+
def generate_linked_table(model: str):
57+
"""Generate CSV with linked attributes to list of valid values.
58+
59+
Desired markdown look: render model reference table so that
3560
- it is known which attributes require valid values
3661
- clicking on attribute will direct to valid values table
3762
"""
3863
parent = join("modules", model)
39-
annotations_file = join(parent, "annotationProperty.csv")
40-
example_file = join(parent, "exampleColumn.csv")
41-
template_file = join(parent, "template.csv")
64+
annotations_file = join(parent, ANNOTATIONS_FILENAME)
65+
example_file = join(parent, EXAMPLE_FILENAME)
66+
reference_file = join(parent, REFERENCE_FILENAME)
4267

4368
# Read both annotation properties and examples
4469
annotation_df = pd.read_csv(annotations_file, quoting=1).fillna("")
4570
examples_df = pd.read_csv(example_file, quoting=1).fillna("")
4671

4772
# First select only the columns we want from annotation_df
48-
df = annotation_df[
49-
["Attribute", "Description", "Required", "Validation Rules", "Valid Values"]
50-
]
73+
df = annotation_df[[
74+
"Attribute",
75+
"Description",
76+
"Required",
77+
"Validation Rules",
78+
"Valid Values",
79+
]]
5180

5281
# Then add the Example column and rename it to Examples
5382
df = df.merge(
@@ -57,71 +86,79 @@ def generate_linked_model(model: str):
5786
).rename(columns={"Example": "Examples"})
5887

5988
# If attribute has a list of valid values, create a link.
60-
for _, row in df[df["Valid Values"].ne("")].iterrows():
61-
attr_link = (
62-
"["
63-
+ row["Attribute"]
64-
+ (
65-
f"](../valid_values/{model}.md#attribute-"
66-
f"{row['Attribute'].lower().replace(' ', '-')})"
67-
)
68-
)
69-
df.at[_, "Attribute"] = attr_link
70-
71-
# For any validation rules with a regex, replace `\` with `\\`
72-
# for proper rendering.
73-
df["Validation Rules"] = df["Validation Rules"].replace(r"\\", r"\\\\", regex=True)
89+
df["Attribute"] = df.apply(
90+
lambda row: (
91+
_create_markdown_link(row["Attribute"], model)
92+
if row["Valid Values"]
93+
else row["Attribute"]
94+
),
95+
axis=1,
96+
)
7497

75-
# Indicate "None" if there are no validation rules for the attribute.
76-
df.loc[df["Validation Rules"] == "", "Validation Rules"] = "_None_"
77-
78-
df[COLS_TO_RENDER].to_csv(template_file, index=False)
98+
# Fix any remaining rendering issues, such as escaping backslashes.
99+
# then output table as CSV.
100+
df["Validation Rules"] = df["Validation Rules"].apply(_format_validation_rules)
101+
df[COLS_TO_RENDER].to_csv(reference_file, index=False)
79102

80103

81104
def generate_valid_values_markdown(model: str):
82-
"""Generates Markdown page for standard terms of the given data model."""
83-
parent = join("docs", "valid_values")
84-
with open(MAPPING_FILE) as f, \
85-
open(join(parent, f"{model}.md"), "w") as md:
105+
"""Generate docs page for standard terms of the given data model."""
106+
dest_parent_dir = join("docs", "valid_values")
107+
108+
with open(join("modules", MAPPING_FILENAME)) as f, \
109+
open(join(dest_parent_dir, f"{model}.md"), "w") as md:
86110
mapping = yaml.safe_load(f)
111+
112+
# Create a section in the docs page for each attribute that has a list
113+
# of standard terms.
87114
for attribute in mapping[model]:
88115
name = attribute.get("name")
89-
file_src = attribute.get("src")
116+
valid_values_src = attribute.get("src")
117+
90118
md.write(f"## Attribute: `{name}`\n\n")
91119
md.write(
92120
'<div style="max-height:650px; overflow-x: hidden; overflow-y: auto;">\n\n'
93121
)
94122
md.write(
95123
"{{ read_csv('"
96-
+ file_src
124+
+ valid_values_src
97125
+ "', header=0, names=['Valid Value','Description'], usecols=['Valid Value','Description'], tablefmt='html') }}\n\n"
98126
)
99127
md.write("</div>\n\n\n")
100128

101129

102-
def on_pre_build(config, **kwargs) -> None:
103-
"""Pre-process data model and valid values files."""
104-
for model in DATA_MODELS.keys():
105-
generate_linked_model(model)
130+
# --- MkDocs event hooks ---
131+
def on_pre_build(config):
132+
"""Pre-process docs setup for the data models of interest.
133+
134+
For each model, generate:
135+
- a table CSV that links an attribute to its list of standard terms
136+
- a docs page of the aforementioned list of standard terms
137+
"""
138+
for model in DATA_MODELS:
139+
generate_linked_table(model)
106140
generate_valid_values_markdown(model)
107141

108142

109-
def on_files(_, config) -> None:
110-
"""Updates the documentation site navigation.
143+
def on_files(_, config):
144+
"""Update docs site navigation after all files are gathered and generated.
111145
112-
This is a hacky solution to updating the nav configuration
113-
to include the automated markdown pages created by the
114-
generate_valid_values_markdown() function.
146+
!!! note
147+
This is a hacky solution to updating config.nav to include the auto-
148+
generated markdown pages created by generate_valid_values_markdown().
115149
"""
116-
with open(NAVIGATION_FILE) as f:
150+
with open(NAVIGATION_FILENAME) as f:
117151
nav_mapping = yaml.safe_load(f)
118-
config["nav"] = nav_mapping
119152

153+
# Initial setup for config.nav.
154+
config["nav"] = nav_mapping
120155
config["nav"]["Standard Terms"] = {
121156
"All terms": "valid_values/all_terms.md",
122157
"Terms by model": [],
123158
}
124-
for model, title in DATA_MODELS.items():
159+
160+
# Dynamically add valid_values docs page for each data model to config.nav.
161+
for model, page_title in DATA_MODELS.items():
125162
config["nav"]["Standard Terms"]["Terms by model"].append(
126-
{title: join("valid_values", model + ".md")}
163+
{page_title: join("valid_values", model + ".md")}
127164
)

0 commit comments

Comments
 (0)