Skip to content

Commit 4d82527

Browse files
Converted bare URLs into clickable links
Ticket: ENT-14112 Signed-off-by: Ihor Aleksandrychiev <ihor.aleksandrychiev@northern.tech> (cherry picked from commit d2b969c)
1 parent ff51699 commit 4d82527

3 files changed

Lines changed: 116 additions & 0 deletions

File tree

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
"""Wrap bare IPv4 URLs in angle brackets so Markdown renders them as links.
2+
3+
Markdown auto-links hostname URLs but not IP-based ones like
4+
``http://192.0.2.1:8080/path``, so this rewrites them to ``<...>`` autolinks.
5+
6+
- Matches ``http(s)://`` URLs with a dotted IPv4 host, optional port and path,
7+
anywhere on a line (not just on their own line).
8+
- Skips fenced code blocks, inline `code` spans, and URLs already inside an
9+
autolink, a `[text](link)`, or an `href="..."` attribute.
10+
- Trailing sentence punctuation is left outside the link.
11+
12+
``run(config)`` is the entry point; it transforms each file in
13+
``config["markdown_files"]`` and writes it back only if it changed.
14+
"""
15+
16+
import re
17+
18+
IPV4_URL_RE = re.compile(
19+
r"""
20+
(?<![<(`/\w"']) # skip if already inside <autolink>, [text](link),
21+
# `code`, an href="..."/'...' attribute, or a longer
22+
# token (word char / slash) that we'd be splitting
23+
( # capture group 1: the URL itself
24+
https?:// # scheme
25+
(?:\d{1,3}\.){3}\d{1,3} # IPv4
26+
(?::\d+)? # optional :port
27+
(?:/[^\s<>)\]`'"]*)? # optional /path — stop at whitespace or chars
28+
# that typically close/quote the URL
29+
)
30+
""",
31+
re.VERBOSE,
32+
)
33+
34+
FENCE_RE = re.compile(
35+
r"""
36+
^(\s*) # leading indentation (captured but unused)
37+
(```+|~~~+) # fence marker: 3+ backticks or 3+ tildes
38+
""",
39+
re.VERBOSE,
40+
)
41+
42+
43+
def run(config):
44+
for file in config["markdown_files"]:
45+
process(file)
46+
47+
48+
def process(file_path):
49+
try:
50+
with open(file_path, "r", encoding="utf-8") as f:
51+
content = f.read()
52+
53+
transformed = transform(content)
54+
55+
if transformed != content:
56+
with open(file_path, "w", encoding="utf-8") as f:
57+
f.write(transformed)
58+
except Exception as e:
59+
print(f"cfdoc_ip_autolink: error processing {file_path}: {e}")
60+
raise
61+
62+
63+
def transform(content):
64+
out_lines = []
65+
in_fence = False
66+
fence_marker = None
67+
68+
for line in content.splitlines(keepends=True):
69+
if in_fence:
70+
out_lines.append(line)
71+
if fence_marker and line.lstrip().startswith(fence_marker):
72+
in_fence = False
73+
fence_marker = None
74+
continue
75+
76+
m = FENCE_RE.match(line)
77+
if m:
78+
fence_marker = m.group(2)
79+
in_fence = True
80+
out_lines.append(line)
81+
continue
82+
83+
out_lines.append(transform_line(line))
84+
85+
return "".join(out_lines)
86+
87+
88+
# Stripped from the end of a URL so sentence punctuation stays in the prose.
89+
TRAILING_PUNCT = ".,;:!?"
90+
91+
92+
def _wrap(match):
93+
url = match.group(1)
94+
trailing = ""
95+
while url and url[-1] in TRAILING_PUNCT:
96+
trailing = url[-1] + trailing
97+
url = url[:-1]
98+
return f"<{url}>{trailing}"
99+
100+
101+
def transform_line(line):
102+
# Split on inline backtick spans so URLs inside `code` are untouched.
103+
parts = re.split(r"(`+[^`\n]*`+)", line)
104+
for i, chunk in enumerate(parts):
105+
if i % 2 == 1:
106+
continue
107+
parts[i] = IPV4_URL_RE.sub(_wrap, chunk)
108+
return "".join(parts)

generator/_scripts/cfdoc_preprocess.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
import cfdoc_shortcodes_resolver as shortcodes_resolver
3434
import cfdoc_images_path_resolver as images_path_resolver
3535
import cfdoc_codeblock_resolver as codeblock_resolver
36+
import cfdoc_ip_autolink as ip_autolink
3637
import sys
3738
import os
3839

@@ -50,3 +51,4 @@
5051
shortcodes_resolver.run(config)
5152
images_path_resolver.run(config)
5253
codeblock_resolver.run(config)
54+
ip_autolink.run(config)

hugo/config.toml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,12 @@ wrapperClass = 'hlc'
4242
[markup.goldmark.renderer]
4343
unsafe = true # Allow HTML in md files
4444

45+
# Automatically convert bare URLs into clickable links
46+
# linkify skips IP addresses, which are handled by
47+
# generator/_scripts/cfdoc_ip_autolink.py pre-process script
48+
[markup.goldmark.extensions]
49+
linkify = true
50+
4551
[params.sitemap]
4652
baseUrl = "https://docs.cfengine.com/docs/%branch%"
4753

0 commit comments

Comments
 (0)