11#! /bin/bash
22
3- # Configuration
43CONTENT_DIR=" content"
54EXIT_CODE=0
65
7- strip_fenced_code_blocks () {
8- awk '
9- BEGIN { code = 0 }
10- /^[[:space:]]*```/ { code = !code; next }
11- /^[[:space:]]*~~~/ { code = !code; next }
12- { if (!code) print }
13- ' " $1 "
14- }
15-
166normalize_link () {
177 local link=" $1 "
188
19- # Remove anchor and query parameters
209 link=" ${link%%#* } "
2110 link=" ${link%% \? * } "
2211
23- # Remove trailing slash
2412 if [[ " $link " != " /" ]]; then
2513 link=" ${link%/ } "
2614 fi
@@ -31,70 +19,56 @@ normalize_link() {
3119check_internal_link () {
3220 local link=" $1 "
3321 local file=" $2 "
22+ local line_no=" $3 "
3423 local clean_link
3524 local target_path
3625
3726 clean_link=$( normalize_link " $link " )
3827
39- # Skip empty or anchor-only links
40- if [[ -z " $clean_link " || " $clean_link " == " #" ]]; then
41- return 0
42- fi
28+ [[ -z " $clean_link " || " $clean_link " == " #" ]] && return 0
4329
44- # Skip Hugo shortcodes
4530 if [[ " $clean_link " == " {{<" * || " $clean_link " == " {{%" * || " $clean_link " == " {{" * ]]; then
4631 return 0
4732 fi
4833
49- # Convert to lowercase for protocol checking (case-insensitive)
5034 local clean_link_lower=" ${clean_link,,} "
5135
52- # Skip external links (case-insensitive)
5336 if [[ " $clean_link_lower " == http://* || " $clean_link_lower " == https://* || " $clean_link_lower " == " //" * ]]; then
5437 return 0
5538 fi
5639
57- # Skip mailto, tel, javascript, data links
5840 case " $clean_link_lower " in
5941 mailto:* |tel:* |javascript:* |data:* )
6042 return 0
6143 ;;
6244 esac
6345
64- # Resolve target path based on link type
6546 if [[ " $clean_link " == /docs/* ]]; then
66- # Hugo path: /docs/* → content/en/docs/*
6747 target_path=" content/en${clean_link} "
6848 elif [[ " $clean_link " == /cn/docs/* ]]; then
69- # Hugo path: /cn/docs/* → content/cn/docs/*
7049 target_path=" content${clean_link} "
7150 elif [[ " $clean_link " == /* ]]; then
72- # Absolute root path: /* → content/en/*
7351 target_path=" content/en${clean_link} "
7452 else
75- # Relative link: resolve against file directory
76- local file_dir=$( dirname " $file " )
53+ local file_dir
54+ file_dir=$( dirname " $file " )
7755 target_path=" ${file_dir} /${clean_link} "
78-
79- # Normalize path (remove redundant ./ and resolve ../)
56+
8057 while [[ " $target_path " == * " /./" * ]]; do
8158 target_path=" ${target_path// \/ .\/ / \/ } "
8259 done
83-
84- # Basic .. resolution
60+
8561 while [[ " $target_path " =~ ([^/]+/\.\. /? ) ]]; do
8662 target_path=" ${target_path/ ${BASH_REMATCH[0]} / } "
8763 done
8864 fi
8965
90- # Check asset files first (non-markdown extensions)
9166 case " $clean_link_lower " in
9267 * .png|* .jpg|* .jpeg|* .svg|* .gif|* .xml|* .yaml|* .yml|* .json|* .css|* .js|* .pdf|* .zip|* .tar.gz)
9368 [[ -f " $target_path " ]] && return 0
9469 ;;
9570 esac
9671
97- # Check for markdown file existence variations
9872 if [[ -f " ${target_path} .md" ]]; then
9973 return 0
10074 elif [[ -f " $target_path " ]]; then
@@ -105,26 +79,51 @@ check_internal_link() {
10579 return 0
10680 fi
10781
108- echo " Error: Broken link in $file "
82+ echo " Error: Broken link"
83+ echo " File: $file :$line_no "
10984 echo " Link: $link "
11085 echo " Target: $target_path (and variants)"
11186 EXIT_CODE=1
11287}
11388
11489echo " Starting link validation..."
11590
116- # Find all markdown files and verify links
11791while read -r FILE; do
118- # Extract inline links [text](url) and check internal doc links
92+ declare -A CODE_LINES
93+ in_code=false
94+ line_no=0
95+
96+ # Pass 1: mark fenced code block lines
97+ while IFS= read -r line; do
98+ (( line_no++ ))
99+ if [[ " $line " =~ ^[[:space:]]* (\`\`\` | ~~~) ]]; then
100+ if $in_code ; then
101+ in_code=false
102+ else
103+ in_code=true
104+ fi
105+ CODE_LINES[$line_no ]=1
106+ elif $in_code ; then
107+ CODE_LINES[$line_no ]=1
108+ fi
109+ done < " $FILE "
110+
111+ # Pass 2: extract links with original line numbers
119112 while read -r MATCH; do
120- if [[ -z " $MATCH " ]]; then continue ; fi
113+ [[ -z " $MATCH " ]] && continue
114+
115+ LINE_NO=" ${MATCH%%:* } "
116+ LINK_PART=" ${MATCH#*: } "
121117
122- # Extract URL from ](url)
123- LINK=" ${MATCH#* ](} "
118+ [[ ${CODE_LINES[$LINE_NO]} ]] && continue
119+
120+ LINK=" ${LINK_PART#* ](} "
124121 LINK=" ${LINK% )} "
125122
126- check_internal_link " $LINK " " $FILE "
127- done < <( strip_fenced_code_blocks " $FILE " | grep -oE ' \]\([^)]+\)' )
123+ check_internal_link " $LINK " " $FILE " " $LINE_NO "
124+ done < <( grep -n -oE ' \]\([^)]+\)' " $FILE " )
125+
126+ unset CODE_LINES
128127done < <( find " $CONTENT_DIR " -type f -name " *.md" )
129128
130129if [[ $EXIT_CODE -eq 0 ]]; then
0 commit comments