Skip to content

Commit 9b45863

Browse files
Converted bare URLs into clickable links
Ticket: ENT-14112 Signed-off-by: Ihor Aleksandrychiev <ihor.aleksandrychiev@northern.tech>
1 parent 5e6bddc commit 9b45863

3 files changed

Lines changed: 101 additions & 0 deletions

File tree

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
import re
2+
3+
4+
IPV4_URL_RE = re.compile(
5+
r"""
6+
(?<![<(`/\w"']) # skip if already inside <autolink>, [text](link),
7+
# `code`, an href="..."/'...' attribute, or a longer
8+
# token (word char / slash) that we'd be splitting
9+
( # capture group 1: the URL itself
10+
https?:// # scheme
11+
(?:\d{1,3}\.){3}\d{1,3} # IPv4
12+
(?::\d+)? # optional :port
13+
(?:/[^\s<>)\]`'"]*)? # optional /path — stop at whitespace or chars
14+
# that typically close/quote the URL
15+
)
16+
""",
17+
re.VERBOSE,
18+
)
19+
20+
FENCE_RE = re.compile(
21+
r"""
22+
^(\s*) # leading indentation (captured but unused)
23+
(```+|~~~+) # fence marker: 3+ backticks or 3+ tildes
24+
""",
25+
re.VERBOSE,
26+
)
27+
28+
29+
def run(config):
30+
for file in config["markdown_files"]:
31+
process(file)
32+
33+
34+
def process(file_path):
35+
try:
36+
with open(file_path, "r", encoding="utf-8") as f:
37+
content = f.read()
38+
39+
transformed = transform(content)
40+
41+
if transformed != content:
42+
with open(file_path, "w", encoding="utf-8") as f:
43+
f.write(transformed)
44+
except Exception as e:
45+
print(f"cfdoc_ip_autolink: error processing {file_path}: {e}")
46+
raise
47+
48+
49+
def transform(content):
50+
out_lines = []
51+
in_fence = False
52+
fence_marker = None
53+
54+
for line in content.splitlines(keepends=True):
55+
if in_fence:
56+
out_lines.append(line)
57+
if fence_marker and line.lstrip().startswith(fence_marker):
58+
in_fence = False
59+
fence_marker = None
60+
continue
61+
62+
m = FENCE_RE.match(line)
63+
if m:
64+
fence_marker = m.group(2)
65+
in_fence = True
66+
out_lines.append(line)
67+
continue
68+
69+
out_lines.append(transform_line(line))
70+
71+
return "".join(out_lines)
72+
73+
74+
# Stripped from the end of a URL so sentence punctuation stays in the prose.
75+
TRAILING_PUNCT = ".,;:!?"
76+
77+
78+
def _wrap(match):
79+
url = match.group(1)
80+
trailing = ""
81+
while url and url[-1] in TRAILING_PUNCT:
82+
trailing = url[-1] + trailing
83+
url = url[:-1]
84+
return f"<{url}>{trailing}"
85+
86+
87+
def transform_line(line):
88+
# Split on inline backtick spans so URLs inside `code` are untouched.
89+
parts = re.split(r"(`+[^`\n]*`+)", line)
90+
for i, chunk in enumerate(parts):
91+
if i % 2 == 1:
92+
continue
93+
parts[i] = IPV4_URL_RE.sub(_wrap, chunk)
94+
return "".join(parts)

generator/_scripts/cfdoc_preprocess.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
import cfdoc_shortcodes_resolver as shortcodes_resolver
3434
import cfdoc_images_path_resolver as images_path_resolver
3535
import cfdoc_codeblock_resolver as codeblock_resolver
36+
import cfdoc_ip_autolink as ip_autolink
3637
import sys
3738
import os
3839

@@ -50,3 +51,4 @@
5051
shortcodes_resolver.run(config)
5152
images_path_resolver.run(config)
5253
codeblock_resolver.run(config)
54+
ip_autolink.run(config)

hugo/config.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,11 @@ wrapperClass = 'hlc'
4242
[markup.goldmark.renderer]
4343
unsafe = true # Allow HTML in md files
4444

45+
# Automatically convert bare URLs into clickable links
46+
# linkify skips IP addresses, which are handled by post-process scripts
47+
[markup.goldmark.extensions]
48+
linkify = true
49+
4550
[params.sitemap]
4651
baseUrl = "https://docs.cfengine.com/docs/%branch%"
4752

0 commit comments

Comments
 (0)