Skip to content

Commit 3bb02f9

Browse files
committed
wip: create utils to hold commonly used functionality
1 parent dc81074 commit 3bb02f9

2 files changed

Lines changed: 162 additions & 1 deletion

File tree

quadriga/metadata/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,13 @@
99
"create_bibtex",
1010
"update_citation_cff",
1111
"extract_from_config_toc",
12-
"run_all"
12+
"run_all",
13+
"utils"
1314
]
1415

1516
# Import the modules to make their functions available
1617
from . import create_bibtex
1718
from . import update_citation_cff
1819
from . import extract_from_config_toc
1920
from . import run_all
21+
from . import utils

quadriga/metadata/utils.py

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Common utility functions for metadata management in the Quadriga Book Template.
4+
This module provides reused functionality across different metadata scripts.
5+
"""
6+
7+
import yaml
8+
import os
9+
import re
10+
import json
11+
from pathlib import Path
12+
from datetime import datetime
13+
14+
# ---- File Path Handling ----
15+
16+
def get_repo_root():
17+
"""
18+
Get the path to the repository root, assuming this module is in quadriga/metadata/.
19+
20+
Returns:
21+
str: Absolute path to the repository root
22+
"""
23+
quadriga_metadata_dir = os.path.dirname(os.path.abspath(__file__))
24+
quadriga_dir = os.path.dirname(quadriga_metadata_dir)
25+
repo_root = os.path.dirname(quadriga_dir)
26+
return repo_root
27+
28+
def get_file_path(relative_path, repo_root=None):
29+
"""
30+
Get the absolute path to a file in the repository.
31+
32+
Args:
33+
relative_path (str): Relative path from the repository root
34+
repo_root (str, optional): Repository root path. If None, it will be determined
35+
36+
Returns:
37+
str: Absolute path to the file
38+
"""
39+
if repo_root is None:
40+
repo_root = get_repo_root()
41+
return os.path.join(repo_root, relative_path)
42+
43+
# ---- YAML Handling ----
44+
45+
def load_yaml_file(file_path):
46+
"""
47+
Load a YAML file and return its contents as a Python object.
48+
49+
Args:
50+
file_path (str): Path to the YAML file
51+
52+
Returns:
53+
dict/list: Contents of the YAML file, or None if an error occurs
54+
"""
55+
try:
56+
with open(file_path, 'r', encoding='utf-8') as file:
57+
return yaml.safe_load(file)
58+
except Exception as e:
59+
print(f"Error loading {file_path}: {e}")
60+
return None
61+
62+
def save_yaml_file(file_path, data, add_schema_comment=None):
63+
"""
64+
Save Python object as YAML to the specified file.
65+
66+
Args:
67+
file_path (str): Path where the YAML file should be saved
68+
data (dict/list): Data to save
69+
add_schema_comment (str, optional): Schema comment to add at the start of the file
70+
e.g. "# yaml-language-server: $schema=quadriga-schema.json"
71+
"""
72+
try:
73+
with open(file_path, 'w', encoding='utf-8') as file:
74+
yaml.dump(data, file, sort_keys=False, default_flow_style=False, allow_unicode=True)
75+
76+
# Add schema comment if requested
77+
if add_schema_comment:
78+
with open(file_path, 'r+', encoding='utf-8') as file:
79+
content = file.read()
80+
file.seek(0, 0)
81+
file.write(f"{add_schema_comment}\n" + content)
82+
83+
print(f"Successfully updated {file_path}")
84+
except Exception as e:
85+
print(f"Error saving to {file_path}: {e}")
86+
87+
# ---- Markdown and Jupyter Content Handling ----
88+
89+
def extract_first_heading(file_path):
90+
"""
91+
Extract the first heading from a markdown or jupyter notebook file.
92+
93+
Args:
94+
file_path (str): Path to the file
95+
96+
Returns:
97+
str: The content of the first heading or filename if no heading found
98+
"""
99+
try:
100+
# Handle both .md and .ipynb files
101+
if file_path.endswith('.ipynb'):
102+
# For Jupyter notebooks, parse the JSON to find the first heading
103+
with open(file_path, 'r', encoding='utf-8') as file:
104+
notebook = json.load(file)
105+
106+
# Look for the first markdown cell with a heading
107+
for cell in notebook.get('cells', []):
108+
if cell.get('cell_type') == 'markdown':
109+
content = ''.join(cell.get('source', []))
110+
heading_match = re.search(r'^#\s+(.+)$', content, re.MULTILINE)
111+
if heading_match:
112+
return heading_match.group(1).strip()
113+
else:
114+
# For markdown files
115+
with open(file_path, 'r', encoding='utf-8') as file:
116+
content = file.read()
117+
118+
# Skip YAML frontmatter if it exists
119+
content = re.sub(r'^---\n.*?\n---\n', '', content, flags=re.DOTALL)
120+
121+
# Extract the first heading
122+
heading_match = re.search(r'^#\s+(.+)$', content, re.MULTILINE)
123+
if heading_match:
124+
return heading_match.group(1).strip()
125+
except Exception as e:
126+
print(f"Error extracting heading from {file_path}: {e}")
127+
128+
# If we couldn't find a heading, return the filename
129+
return os.path.splitext(os.path.basename(file_path))[0]
130+
131+
# ---- Citation Handling ----
132+
133+
def format_authors_for_bibtex(authors):
134+
"""
135+
Format a list of authors in the proper BibTeX format.
136+
137+
Args:
138+
authors (list): List of author dictionaries with 'given-names' and 'family-names'
139+
140+
Returns:
141+
str: Authors formatted for BibTeX
142+
"""
143+
return " and ".join([f"{a.get('family-names', '')}, {a.get('given-names', '')}" for a in authors])
144+
145+
def generate_citation_key(authors, title, year):
146+
"""
147+
Generate a citation key for BibTeX.
148+
149+
Args:
150+
authors (list): List of author dictionaries
151+
title (str): Title of the work
152+
year (str): Year of publication
153+
154+
Returns:
155+
str: Citation key
156+
"""
157+
first_author = authors[0] if authors else {'family-names': 'Unknown'}
158+
title_words = title.split()
159+
return f"{first_author.get('family-names', 'Unknown')}_{title_words[0] if title_words else 'Untitled'}_{year}"

0 commit comments

Comments
 (0)