33CONTENT_DIR=" content"
44EXIT_CODE=0
55
6- SCRIPT_DIR=" $( cd " $( dirname " ${BASH_SOURCE[0]} " ) " && pwd) "
7- REPO_ROOT=" $( cd " $SCRIPT_DIR /.." && pwd) "
8- CONTENT_ROOT=" $( cd " $REPO_ROOT /$CONTENT_DIR " && pwd) "
6+ VERBOSE=" ${VERBOSE:- 0} "
7+
8+ log_verbose () {
9+ [[ " $VERBOSE " == " 1" ]] && echo " Info: $* "
10+ }
11+
12+
13+ ASSET_EXTENSIONS_REGEX=' png|jpg|jpeg|svg|gif|webp|avif|ico|xml|yaml|yml|json|css|js|pdf|zip|tar.gz|woff|woff2|ttf|eot|mp4|webm'
14+ SCRIPT_DIR=" $( cd " $( dirname " ${BASH_SOURCE[0]} " ) " && pwd) " || exit 1
15+ REPO_ROOT=" $( cd " $SCRIPT_DIR /.." && pwd) " || exit 1
16+ CONTENT_ROOT=" $( cd " $REPO_ROOT /$CONTENT_DIR " && pwd) " || exit 1
917
1018if [[ ! -d " $CONTENT_ROOT " ]]; then
1119 echo " Error: content directory not found. Run from repository root."
1523normalize_link () {
1624 local link=" $1 "
1725
26+ # Decode common URL-encoded characters explicitly
27+ link=" ${link//% 20/ } " # space
28+ link=" ${link//% 23/# } " # hash
29+ link=" ${link//% 2F/ \/ } " # forward slash
30+
31+ # Generic percent-decoding for remaining cases
1832 link=" ${link//%/ \\ x} "
1933 link=" $( printf ' %b' " $link " ) "
2034
@@ -58,16 +72,17 @@ resolve_real_path() {
5872 local path=" $1 "
5973
6074 if command -v python3 > /dev/null 2>&1 ; then
61- # Use python to compute realpath which is tolerant of non existing final target
75+ # Use Python to compute realpath which resolves symlinks AND normalizes paths
76+ # Python's os.path.realpath is tolerant of non-existent final targets
6277 python3 - << 'PY ' "$path"
6378import os
6479import sys
6580p = sys.argv[1]
66- # os.path.realpath resolves symlinks for existing components and otherwise returns a normalized path
6781print(os.path.realpath(p))
6882PY
6983 else
70- # Fallback to the safe canonicalize_path output if python3 is not available
84+ # Fallback: Normalize without symlink resolution if Python3 unavailable
85+ # Note: This won't resolve symlinks, only normalize .. and . components
7186 canonicalize_path " $path "
7287 fi
7388}
@@ -85,12 +100,14 @@ check_internal_link() {
85100 [[ -z " $clean_link " || " $clean_link " == " #" ]] && return 0
86101
87102 if [[ " $clean_link " == " {{" * ]]; then
103+ log_verbose " Skipping Hugo shortcode link: $link ($file :$line_no )"
88104 return 0
89105 fi
90106
91107 local clean_lower=" ${clean_link,,} "
92108
93109 if [[ " $clean_lower " == http://* || " $clean_lower " == https://* || " $clean_lower " == " //" * ]]; then
110+ log_verbose " Skipping external link: $link ($file :$line_no )"
94111 return 0
95112 fi
96113
@@ -105,7 +122,14 @@ check_internal_link() {
105122 elif [[ " $clean_link " == /cn/docs/* ]]; then
106123 target_path=" $CONTENT_ROOT ${clean_link} "
107124 elif [[ " $clean_link " == /* ]]; then
108- target_path=" $CONTENT_ROOT /en${clean_link} "
125+ # Skip validation for ambiguous absolute paths (Hugo runtime URLs)
126+ location=" $file "
127+ [[ -n " $line_no " ]] && location=" $file :$line_no "
128+ echo " Warning: Skipping validation for ambiguous absolute path"
129+ echo " File: $location "
130+ echo " Link: $link "
131+ echo " Reason: Hugo runtime URL (not directly mappable to filesystem)"
132+ return 0
109133 else
110134 local file_dir
111135 file_dir=" $( cd " $( dirname " $file " ) " && pwd) "
@@ -128,22 +152,20 @@ check_internal_link() {
128152 ;;
129153 esac
130154
131- case " $clean_lower " in
132- * .png|* .jpg|* .jpeg|* .svg|* .gif|* .xml|* .yaml|* .yml|* .json|* .css|* .js|* .pdf|* .zip|* .tar.gz)
133- if [[ -f " $target_path " ]]; then
134- return 0
135- else
136- location=" $file "
137- [[ -n " $line_no " ]] && location=" $file :$line_no "
138- echo " Error: Broken link"
139- echo " File: $location "
140- echo " Link: $link "
141- echo " Target: $target_path "
142- EXIT_CODE=1
143- return
144- fi
145- ;;
146- esac
155+ if [[ " $clean_lower " =~ \. (${ASSET_EXTENSIONS_REGEX} )$ ]]; then
156+ if [[ -f " $target_path " ]]; then
157+ return 0
158+ else
159+ location=" $file "
160+ [[ -n " $line_no " ]] && location=" $file :$line_no "
161+ echo " Error: Broken link"
162+ echo " File: $location "
163+ echo " Link: $link "
164+ echo " Target: $target_path "
165+ EXIT_CODE=1
166+ return
167+ fi
168+ fi
147169
148170 if [[ -f " $target_path " || -f " $target_path .md" || -f " $target_path /_index.md" || -f " $target_path /README.md" ]]; then
149171 return 0
@@ -170,6 +192,12 @@ while read -r FILE; do
170192 (( line_no++ ))
171193
172194 if [[ " $line " =~ ^[[:space:]]* (\`\`\` | ~~~) ]]; then
195+ # NOTE:
196+ # Code fence detection assumes fences are properly paired.
197+ # If a Markdown file contains an unclosed or mismatched fence,
198+ # subsequent content may be treated as code and skipped.
199+ # This script does not attempt full Markdown validation.
200+
173201 if $in_fence ; then
174202 in_fence=false
175203 else
@@ -184,10 +212,18 @@ while read -r FILE; do
184212 continue
185213 fi
186214
187- inline_count=$( grep -o " \` " <<< " $line" | wc -l)
215+ # NOTE:
216+ # Inline code detection is heuristic.
217+ # It assumes backticks are paired on the same line.
218+ # Escaped backticks are ignored, but complex or malformed
219+ # Markdown inline code spans may still be misdetected.
220+
221+ escaped_line=" ${line// \\\` / } "
222+ inline_count=$( grep -o " \` " <<< " $escaped_line" | wc -l)
188223 if (( inline_count % 2 == 1 )) ; then
189224 CODE_LINES[$line_no ]=1
190225 fi
226+
191227 done < " $FILE "
192228
193229 while read -r MATCH; do
0 commit comments