Converted bare URLs into clickable links

aleksandrychev · aleksandrychev · commit 9b45863f7950 · 2026-06-09T14:45:19.000+03:00
Ticket: ENT-14112
Signed-off-by: Ihor Aleksandrychiev &lt;ihor.aleksandrychiev@northern.tech&gt;
diff --git a/generator/_scripts/cfdoc_ip_autolink.py b/generator/_scripts/cfdoc_ip_autolink.py
@@ -0,0 +1,94 @@
+import re
+
+
+IPV4_URL_RE = re.compile(
+    r"""
+    (?<![<(`/\w"'])            # skip if already inside <autolink>, [text](link),
+                               # `code`, an href="..."/'...' attribute, or a longer
+                               # token (word char / slash) that we'd be splitting
+    (                          # capture group 1: the URL itself
+      https?://                # scheme
+      (?:\d{1,3}\.){3}\d{1,3}  # IPv4
+      (?::\d+)?                # optional :port
+      (?:/[^\s<>)\]`'"]*)?     # optional /path — stop at whitespace or chars
+                               # that typically close/quote the URL
+    )
+    """,
+    re.VERBOSE,
+)
+
+FENCE_RE = re.compile(
+    r"""
+    ^(\s*)              # leading indentation (captured but unused)
+    (```+|~~~+)         # fence marker: 3+ backticks or 3+ tildes
+    """,
+    re.VERBOSE,
+)
+
+
+def run(config):
+    for file in config["markdown_files"]:
+        process(file)
+
+
+def process(file_path):
+    try:
+        with open(file_path, "r", encoding="utf-8") as f:
+            content = f.read()
+
+        transformed = transform(content)
+
+        if transformed != content:
+            with open(file_path, "w", encoding="utf-8") as f:
+                f.write(transformed)
+    except Exception as e:
+        print(f"cfdoc_ip_autolink: error processing {file_path}: {e}")
+        raise
+
+
+def transform(content):
+    out_lines = []
+    in_fence = False
+    fence_marker = None
+
+    for line in content.splitlines(keepends=True):
+        if in_fence:
+            out_lines.append(line)
+            if fence_marker and line.lstrip().startswith(fence_marker):
+                in_fence = False
+                fence_marker = None
+            continue
+
+        m = FENCE_RE.match(line)
+        if m:
+            fence_marker = m.group(2)
+            in_fence = True
+            out_lines.append(line)
+            continue
+
+        out_lines.append(transform_line(line))
+
+    return "".join(out_lines)
+
+
+# Stripped from the end of a URL so sentence punctuation stays in the prose.
+TRAILING_PUNCT = ".,;:!?"
+
+
+def _wrap(match):
+    url = match.group(1)
+    trailing = ""
+    while url and url[-1] in TRAILING_PUNCT:
+        trailing = url[-1] + trailing
+        url = url[:-1]
+    return f"<{url}>{trailing}"
+
+
+def transform_line(line):
+    # Split on inline backtick spans so URLs inside `code` are untouched.
+    parts = re.split(r"(`+[^`\n]*`+)", line)
+    for i, chunk in enumerate(parts):
+        if i % 2 == 1:
+            continue
+        parts[i] = IPV4_URL_RE.sub(_wrap, chunk)
+    return "".join(parts)
diff --git a/generator/_scripts/cfdoc_preprocess.py b/generator/_scripts/cfdoc_preprocess.py
@@ -33,6 +33,7 @@
 import cfdoc_shortcodes_resolver as shortcodes_resolver
 import cfdoc_images_path_resolver as images_path_resolver
 import cfdoc_codeblock_resolver as codeblock_resolver
+import cfdoc_ip_autolink as ip_autolink
 import sys
 import os
 
@@ -50,3 +51,4 @@
 shortcodes_resolver.run(config)
 images_path_resolver.run(config)
 codeblock_resolver.run(config)
+ip_autolink.run(config)
diff --git a/hugo/config.toml b/hugo/config.toml
@@ -42,6 +42,11 @@ wrapperClass = 'hlc'
 [markup.goldmark.renderer]
   unsafe = true # Allow HTML in md files
 
+# Automatically convert bare URLs into clickable links
+# linkify skips IP addresses, which are handled by post-process scripts
+[markup.goldmark.extensions]
+  linkify = true
+
 [params.sitemap]
 baseUrl = "https://docs.cfengine.com/docs/%branch%"