-
Notifications
You must be signed in to change notification settings - Fork 2.4k
/
Copy pathtoc.sh
executable file
·85 lines (78 loc) · 2.77 KB
/
toc.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#!/usr/bin/env bash
set -Eeuo pipefail
self="$(basename "$0")"
usage() {
cat <<-EOU
usage: $self path/to/markdown.md
eg: $self README.md
WARNING: this will *always* clobber any path/to/markdown.md.{toc,bak} while processing; use with caution!
EOU
}
markdown="${1:-}"
if ! shift || [ ! -s "$markdown" ]; then usage >&2; exit 1; fi
# see https://gist.github.com/tianon/75e267d9137b1c2978031b66b3a98987 for an insane test case for this (with several rough edges)
jq --raw-input --null-input --raw-output '
reduce inputs as $line ({ toc: "" };
if $line | test("^```") then
.ignore |= not
else . end
| if .ignore then . else
(
$line
| capture("^(?<hash>#+)[[:space:]]*(?<heading>.*?)[[:space:]]*$")
// null
) as $cap
| if $cap then
($cap.hash | length) as $level
| .levels[$level] += 1
| .levels |= (.[range($level+1; length)] = 0)
| (
$cap.heading
| ascii_downcase
# https://github.com/thlorenz/anchor-markdown-header/blob/6b9bc1c902e48942666859fb6f795d91cbfd48e7/anchor-markdown-header.js#L33-L48
| gsub(" "; "-")
# escape codes (commented out because this is not something GitHub strips, although it *does* strip % which is not included below, so that is added here)
#| gsub("%[abcdef0-9]{2}"; ""; "i")
| gsub("%"; "")
# single chars that are removed
| gsub("[\\\\/?!:\\[\\]`.,()*\"'"'"';{}+=<>~$|#@&–—]"; "")
# CJK punctuations that are removed
| gsub("[。?!,、;:“”【】()〔〕[]﹃﹄“ ”‘’﹁﹂—…-~《》〈〉「」]"; "")
# Strip emojis (*technically* this is way too aggressive and will strip out *all* UTF-8, but 🤷)
| (split("") | map(select(utf8bytelength == 1)) | join(""))
# TODO Strip embedded markdown formatting
) as $anchor
# handle repetition (same end anchor)
| (
(.seen // []) as $seen
| first(
# this 1000 limits how many repeated headings we can have, but 1000 of the exact same header text seems pretty generous 🙊
$anchor + (range(1000) | if . > 0 then "-\(.)" else "" end)
| select(IN($seen[]) | not)
)
// error("repetition level too deep on #\($anchor) (\($line)) at line \(input_line_number)")
) as $finalAnchor
| .toc += "\("\t" * ($level-1) // "")\(.levels[$level]).\t[\($cap.heading)](#\($finalAnchor))\n"
| .seen += [ $finalAnchor ]
else . end
end
)
| .toc
' "$markdown" > "$markdown.toc"
gawk -v tocFile="$markdown.toc" '
/^<!-- AUTOGENERATED TOC -->$/ {
inToc = !inToc
seenToc = 1
if (inToc) {
print
print ""
system("cat " tocFile)
# no need for another newline because tocFile should already end with one
print
}
next
}
!inToc { print }
' "$markdown" > "$markdown.bak"
mv -f "$markdown.bak" "$markdown"
rm -f "$markdown.toc"