33CONTENT_DIR=" content"
44EXIT_CODE=0
55
6+ SCRIPT_DIR=" $( cd " $( dirname " ${BASH_SOURCE[0]} " ) " && pwd) "
7+ REPO_ROOT=" $( cd " $SCRIPT_DIR /.." && pwd) "
8+ CONTENT_ROOT=" $( cd " $REPO_ROOT /$CONTENT_DIR " && pwd) "
9+
10+ if [[ ! -d " $CONTENT_ROOT " ]]; then
11+ echo " Error: content directory not found. Run from repository root."
12+ exit 1
13+ fi
14+
615normalize_link () {
716 local link=" $1 "
817
18+ link=" ${link//%/ \\ x} "
19+ link=" $( printf ' %b' " $link " ) "
20+
921 link=" ${link%%#* } "
1022 link=" ${link%% \? * } "
1123
@@ -16,99 +28,168 @@ normalize_link() {
1628 printf " %s" " $link "
1729}
1830
31+ canonicalize_path () {
32+ local path=" $1 "
33+ local result=()
34+ local part
35+
36+ IFS=' /' read -r -a parts <<< " $path"
37+
38+ for part in " ${parts[@]} " ; do
39+ if [[ -z " $part " || " $part " == " ." ]]; then
40+ continue
41+ elif [[ " $part " == " .." ]]; then
42+ if [[ ${# result[@]} -gt 0 ]]; then
43+ unset ' result[-1]'
44+ fi
45+ else
46+ result+=(" $part " )
47+ fi
48+ done
49+
50+ if [[ ${# result[@]} -eq 0 ]]; then
51+ printf " /"
52+ else
53+ ( IFS=' /' ; printf " /%s" " ${result[*]} " )
54+ fi
55+ }
56+
57+ resolve_real_path () {
58+ local path=" $1 "
59+
60+ if command -v python3 > /dev/null 2>&1 ; then
61+ # Use python to compute realpath which is tolerant of non existing final target
62+ python3 - << 'PY ' "$path"
63+ import os
64+ import sys
65+ p = sys.argv[1]
66+ # os.path.realpath resolves symlinks for existing components and otherwise returns a normalized path
67+ print(os.path.realpath(p))
68+ PY
69+ else
70+ # Fallback to the safe canonicalize_path output if python3 is not available
71+ canonicalize_path " $path "
72+ fi
73+ }
74+
1975check_internal_link () {
2076 local link=" $1 "
2177 local file=" $2 "
2278 local line_no=" $3 "
2379 local clean_link
2480 local target_path
81+ local location
2582
26- clean_link=$( normalize_link " $link " )
83+ clean_link=" $( normalize_link " $link " ) "
2784
2885 [[ -z " $clean_link " || " $clean_link " == " #" ]] && return 0
2986
30- if [[ " $clean_link " == " {{< " * || " $clean_link " == " {{% " * || " $clean_link " == " {{ " * ]]; then
87+ if [[ " $clean_link " == " {{" * ]]; then
3188 return 0
3289 fi
3390
34- local clean_link_lower =" ${clean_link,,} "
91+ local clean_lower =" ${clean_link,,} "
3592
36- if [[ " $clean_link_lower " == http://* || " $clean_link_lower " == https://* || " $clean_link_lower " == " //" * ]]; then
93+ if [[ " $clean_lower " == http://* || " $clean_lower " == https://* || " $clean_lower " == " //" * ]]; then
3794 return 0
3895 fi
3996
40- case " $clean_link_lower " in
97+ case " $clean_lower " in
4198 mailto:* |tel:* |javascript:* |data:* )
4299 return 0
43100 ;;
44101 esac
45102
46103 if [[ " $clean_link " == /docs/* ]]; then
47- target_path=" content /en${clean_link} "
104+ target_path=" $CONTENT_ROOT /en${clean_link} "
48105 elif [[ " $clean_link " == /cn/docs/* ]]; then
49- target_path=" content ${clean_link} "
106+ target_path=" $CONTENT_ROOT ${clean_link} "
50107 elif [[ " $clean_link " == /* ]]; then
51- target_path=" content /en${clean_link} "
108+ target_path=" $CONTENT_ROOT /en${clean_link} "
52109 else
53110 local file_dir
54- file_dir=$( dirname " $file " )
55- target_path=" ${file_dir} /${clean_link} "
56-
57- while [[ " $target_path " == * " /./" * ]]; do
58- target_path=" ${target_path// \/ .\/ / \/ } "
59- done
60-
61- while [[ " $target_path " =~ ([^/]+/\.\. /? ) ]]; do
62- target_path=" ${target_path/ ${BASH_REMATCH[0]} / } "
63- done
111+ file_dir=" $( cd " $( dirname " $file " ) " && pwd) "
112+ target_path=" $file_dir /$clean_link "
64113 fi
65114
66- case " $clean_link_lower " in
115+ target_path=" $( canonicalize_path " $target_path " ) "
116+ target_path=" $( resolve_real_path " $target_path " ) "
117+
118+ case " $target_path " in
119+ " $CONTENT_ROOT " /* ) ;;
120+ * )
121+ location=" $file "
122+ [[ -n " $line_no " ]] && location=" $file :$line_no "
123+ echo " Error: Link resolves outside content directory"
124+ echo " File: $location "
125+ echo " Link: $link "
126+ EXIT_CODE=1
127+ return
128+ ;;
129+ esac
130+
131+ case " $clean_lower " in
67132 * .png|* .jpg|* .jpeg|* .svg|* .gif|* .xml|* .yaml|* .yml|* .json|* .css|* .js|* .pdf|* .zip|* .tar.gz)
68- [[ -f " $target_path " ]] && return 0
133+ if [[ -f " $target_path " ]]; then
134+ return 0
135+ else
136+ location=" $file "
137+ [[ -n " $line_no " ]] && location=" $file :$line_no "
138+ echo " Error: Broken link"
139+ echo " File: $location "
140+ echo " Link: $link "
141+ echo " Target: $target_path "
142+ EXIT_CODE=1
143+ return
144+ fi
69145 ;;
70146 esac
71147
72- if [[ -f " ${target_path} .md" ]]; then
73- return 0
74- elif [[ -f " $target_path " ]]; then
75- return 0
76- elif [[ -f " ${target_path} /_index.md" ]]; then
77- return 0
78- elif [[ -f " ${target_path} /README.md" ]]; then
148+ if [[ -f " $target_path " || -f " $target_path .md" || -f " $target_path /_index.md" || -f " $target_path /README.md" ]]; then
79149 return 0
80150 fi
81151
152+ location=" $file "
153+ [[ -n " $line_no " ]] && location=" $file :$line_no "
154+
82155 echo " Error: Broken link"
83- echo " File: $file : $line_no "
156+ echo " File: $location "
84157 echo " Link: $link "
85- echo " Target: $target_path (and variants) "
158+ echo " Target: $target_path "
86159 EXIT_CODE=1
87160}
88161
89162echo " Starting link validation..."
90163
91164while read -r FILE; do
92165 declare -A CODE_LINES
93- in_code =false
166+ in_fence =false
94167 line_no=0
95168
96- # Pass 1: mark fenced code block lines
97169 while IFS= read -r line; do
98170 (( line_no++ ))
171+
99172 if [[ " $line " =~ ^[[:space:]]* (\`\`\` | ~~~) ]]; then
100- if $in_code ; then
101- in_code =false
173+ if $in_fence ; then
174+ in_fence =false
102175 else
103- in_code =true
176+ in_fence =true
104177 fi
105178 CODE_LINES[$line_no ]=1
106- elif $in_code ; then
179+ continue
180+ fi
181+
182+ if $in_fence ; then
183+ CODE_LINES[$line_no ]=1
184+ continue
185+ fi
186+
187+ inline_count=$( grep -o " \` " <<< " $line" | wc -l)
188+ if (( inline_count % 2 == 1 )) ; then
107189 CODE_LINES[$line_no ]=1
108190 fi
109191 done < " $FILE "
110192
111- # Pass 2: extract links with original line numbers
112193 while read -r MATCH; do
113194 [[ -z " $MATCH " ]] && continue
114195
@@ -124,7 +205,7 @@ while read -r FILE; do
124205 done < <( grep -n -oE ' \]\([^)]+\)' " $FILE " )
125206
126207 unset CODE_LINES
127- done < <( find " $CONTENT_DIR " -type f -name " *.md" )
208+ done < <( find " $CONTENT_ROOT " -type f -name " *.md" )
128209
129210if [[ $EXIT_CODE -eq 0 ]]; then
130211 echo " Link validation passed!"
0 commit comments