Skip to content

Commit bf8b99a

Browse files
committed
add case insensitive redirects for all _materials
1 parent 237d352 commit bf8b99a

File tree

5 files changed

+124
-85
lines changed

5 files changed

+124
-85
lines changed

.github/workflows/fetch-remark-metadata.yml .github/workflows/site-preprocess.yml

+4-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: Copy REMARK metadata
1+
name: Preprocessing to build website
22
on:
33
push:
44
branches:
@@ -22,8 +22,10 @@ jobs:
2222
run: |
2323
python -m pip install --upgrade pip
2424
python -m pip install pyyaml==6.0.0
25-
- name: copy metadata
25+
- name: copy REMARK metadata
2626
run: python scripts/populate_materials.py
27+
- name: case insensitive redirects
28+
run: python scripts/caseinsensitive_redirects.py
2729
- name: update gh-pages branch
2830
run: |
2931
git config --global user.name github-actions

_layouts/material.html

+2
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,8 @@ <h2 class="heading">Metadata</h2>
349349
and item != 'title'
350350
and item != 'slug'
351351
and item != 'ext'
352+
and item != 'redirect_from'
353+
and item != 'redirect_to'
352354
%}
353355
{% if page[item] != '' %}
354356
<tr>

scripts/caseinsensitive_redirects.py

+53
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
from pathlib import Path
2+
from itertools import product
3+
from io import StringIO
4+
from re import fullmatch, split as re_split
5+
6+
from yaml import safe_load, dump as yaml_dump
7+
8+
def parse_yaml_header(f):
9+
metadata = StringIO()
10+
# advance to metadata section
11+
for line in f:
12+
if line.strip() == '---':
13+
break
14+
# copy metadata section
15+
for line in f:
16+
if line.strip() == '---':
17+
break
18+
metadata.write(line)
19+
metadata.seek(0)
20+
return safe_load(metadata), f
21+
22+
def generate_case_combinations(name):
23+
# Find all uppercase letters and their positions
24+
pattern = '[A-Z]+'
25+
parts = re_split(f'({pattern})', name)
26+
27+
# Generate case variants for matched groups
28+
case_options = [
29+
[part.lower(), part.upper()] if fullmatch(pattern, part) else [part]
30+
for part in parts
31+
]
32+
33+
# Create all combinations and join them back
34+
yield from (''.join(combo) for combo in product(*case_options))
35+
36+
if __name__ == '__main__':
37+
repo_root = Path(__file__).parents[1]
38+
39+
for path in repo_root.glob('_materials/*.md'):
40+
with open(path) as f:
41+
metadata, f = parse_yaml_header(f)
42+
metadata.setdefault('redirect_from', [])
43+
metadata['redirect_from'] += [
44+
n for n in generate_case_combinations(path.stem)
45+
if n != path.stem
46+
]
47+
body = f.read()
48+
49+
with open(path, 'w') as f:
50+
f.write('---\n')
51+
yaml_dump(metadata, f, default_flow_style=False)
52+
f.write('---\n')
53+
f.write(body)

scripts/populate_materials.py

-83
This file was deleted.

scripts/populate_remarks.py

+65
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
from concurrent.futures import ThreadPoolExecutor, as_completed
2+
from pathlib import Path
3+
from io import StringIO
4+
from itertools import product
5+
from re import finditer
6+
from subprocess import run
7+
from tempfile import TemporaryDirectory
8+
9+
from yaml import safe_load, dump as yaml_dump
10+
11+
def parse_yaml_header(f):
12+
metadata = StringIO()
13+
# advance to metadata section
14+
for line in f:
15+
if line.strip() == '---':
16+
break
17+
# copy metadata section
18+
for line in f:
19+
if line.strip() == '---':
20+
break
21+
metadata.write(line)
22+
metadata.seek(0)
23+
return safe_load(metadata), f
24+
25+
repo_root = Path(__file__).parents[1]
26+
27+
if __name__ == '__main__':
28+
with TemporaryDirectory() as d:
29+
tmpdir = Path(d)
30+
run(['git', 'clone', '--filter=tree:0', 'https://github.com/econ-ark/REMARK'], cwd=tmpdir)
31+
32+
remotes = {}
33+
for yml_path in tmpdir.glob('REMARK/REMARKs/*yml'):
34+
with open(yml_path) as f:
35+
metadata = safe_load(f)
36+
remotes[yml_path.stem] = metadata['remote']
37+
38+
futures = {}
39+
with ThreadPoolExecutor(4) as pool:
40+
for name, uri in remotes.items():
41+
futures[name] = pool.submit(
42+
lambda uri: run(['git', 'clone', '--sparse', uri], cwd=tmpdir),
43+
uri=uri
44+
)
45+
46+
for name, result in futures.items():
47+
result.result()
48+
cff = tmpdir / name / 'CITATION.cff'
49+
if not cff.exists():
50+
continue
51+
with open(cff) as f:
52+
remark_data = safe_load(f)
53+
54+
if (tmpdir / name / 'REMARK.md').exists():
55+
with open(tmpdir / name / 'REMARK.md') as f:
56+
mdata, f = parse_yaml_header(f)
57+
body = f.read()
58+
remark_data.update(mdata)
59+
60+
with open(repo_root / '_materials' / f'{name}.md', 'w') as f:
61+
f.write('---\n')
62+
yaml_dump(remark_data, f, default_flow_style=False)
63+
f.write('---\n')
64+
f.write(body)
65+

0 commit comments

Comments
 (0)