Skip to content

Commit a18fb4e

Browse files
Converted bare URLs into clickable links
Ticket: ENT-14112 Signed-off-by: Ihor Aleksandrychiev <ihor.aleksandrychiev@northern.tech>
1 parent 5e6bddc commit a18fb4e

3 files changed

Lines changed: 100 additions & 0 deletions

File tree

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
import re
2+
3+
IPV4_URL_RE = re.compile(
4+
r"""
5+
(?<![<(`/\w"']) # skip if already inside <autolink>, [text](link),
6+
# `code`, an href="..."/'...' attribute, or a longer
7+
# token (word char / slash) that we'd be splitting
8+
( # capture group 1: the URL itself
9+
https?:// # scheme
10+
(?:\d{1,3}\.){3}\d{1,3} # IPv4
11+
(?::\d+)? # optional :port
12+
(?:/[^\s<>)\]`'"]*)? # optional /path — stop at whitespace or chars
13+
# that typically close/quote the URL
14+
)
15+
""",
16+
re.VERBOSE,
17+
)
18+
19+
FENCE_RE = re.compile(
20+
r"""
21+
^(\s*) # leading indentation (captured but unused)
22+
(```+|~~~+) # fence marker: 3+ backticks or 3+ tildes
23+
""",
24+
re.VERBOSE,
25+
)
26+
27+
28+
def run(config):
29+
for file in config["markdown_files"]:
30+
process(file)
31+
32+
33+
def process(file_path):
34+
try:
35+
with open(file_path, "r", encoding="utf-8") as f:
36+
content = f.read()
37+
38+
transformed = transform(content)
39+
40+
if transformed != content:
41+
with open(file_path, "w", encoding="utf-8") as f:
42+
f.write(transformed)
43+
except Exception as e:
44+
print(f"cfdoc_ip_autolink: error processing {file_path}: {e}")
45+
raise
46+
47+
48+
def transform(content):
49+
out_lines = []
50+
in_fence = False
51+
fence_marker = None
52+
53+
for line in content.splitlines(keepends=True):
54+
if in_fence:
55+
out_lines.append(line)
56+
if fence_marker and line.lstrip().startswith(fence_marker):
57+
in_fence = False
58+
fence_marker = None
59+
continue
60+
61+
m = FENCE_RE.match(line)
62+
if m:
63+
fence_marker = m.group(2)
64+
in_fence = True
65+
out_lines.append(line)
66+
continue
67+
68+
out_lines.append(transform_line(line))
69+
70+
return "".join(out_lines)
71+
72+
73+
# Stripped from the end of a URL so sentence punctuation stays in the prose.
74+
TRAILING_PUNCT = ".,;:!?"
75+
76+
77+
def _wrap(match):
78+
url = match.group(1)
79+
trailing = ""
80+
while url and url[-1] in TRAILING_PUNCT:
81+
trailing = url[-1] + trailing
82+
url = url[:-1]
83+
return f"<{url}>{trailing}"
84+
85+
86+
def transform_line(line):
87+
# Split on inline backtick spans so URLs inside `code` are untouched.
88+
parts = re.split(r"(`+[^`\n]*`+)", line)
89+
for i, chunk in enumerate(parts):
90+
if i % 2 == 1:
91+
continue
92+
parts[i] = IPV4_URL_RE.sub(_wrap, chunk)
93+
return "".join(parts)

generator/_scripts/cfdoc_preprocess.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
import cfdoc_shortcodes_resolver as shortcodes_resolver
3434
import cfdoc_images_path_resolver as images_path_resolver
3535
import cfdoc_codeblock_resolver as codeblock_resolver
36+
import cfdoc_ip_autolink as ip_autolink
3637
import sys
3738
import os
3839

@@ -50,3 +51,4 @@
5051
shortcodes_resolver.run(config)
5152
images_path_resolver.run(config)
5253
codeblock_resolver.run(config)
54+
ip_autolink.run(config)

hugo/config.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,11 @@ wrapperClass = 'hlc'
4242
[markup.goldmark.renderer]
4343
unsafe = true # Allow HTML in md files
4444

45+
# Automatically convert bare URLs into clickable links
46+
# linkify skips IP addresses, which are handled by pre-process scripts
47+
[markup.goldmark.extensions]
48+
linkify = true
49+
4550
[params.sitemap]
4651
baseUrl = "https://docs.cfengine.com/docs/%branch%"
4752

0 commit comments

Comments
 (0)