Skip to content

Commit dcbe27f

Browse files
authored
[Feat]:add a new script to check deadlink (#2346)
1 parent b4ff9f8 commit dcbe27f

File tree

1 file changed

+149
-0
lines changed

1 file changed

+149
-0
lines changed

check_move_global.py

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
import argparse
2+
import subprocess
3+
import re
4+
import os
5+
import sys
6+
from typing import AnyStr, List
7+
from urllib.parse import urlparse
8+
9+
move_pairs = []
10+
deletes = []
11+
change_detected = False
12+
search_dirs = ["docs", "i18n", "versioned_docs"]
13+
14+
def is_same_file(path1, path2):
15+
return os.path.normpath(path1) == os.path.normpath(path2)
16+
17+
def remove_suffix(text: str, suffix: str):
18+
if text.endswith(suffix):
19+
return text[: -len(suffix)]
20+
return text
21+
22+
def find_nearest_file(file_base, start_dir):
23+
"""
24+
在 start_dir 向上查找最近的 file_base(.md/.mdx),否则全局搜索
25+
"""
26+
cur_dir = start_dir
27+
# 向上搜索最多 10 层,避免卡死
28+
for _ in range(10):
29+
for ext in [".md", ".mdx"]:
30+
candidate = os.path.join(cur_dir, file_base + ext)
31+
if os.path.exists(candidate):
32+
return candidate
33+
parent = os.path.dirname(cur_dir)
34+
if parent == cur_dir:
35+
break
36+
cur_dir = parent
37+
38+
# 全局搜索
39+
for base_dir in search_dirs:
40+
for root, dirs, files in os.walk(base_dir):
41+
for file in files:
42+
if (file == file_base + ".md") or (file == file_base + ".mdx"):
43+
return os.path.join(root, file)
44+
return None
45+
46+
def process_md_file(file_path):
47+
global change_detected
48+
49+
link_pattern = re.compile(r"\[.*?\]\((.*?)\)")
50+
with open(file_path, "r", encoding="utf-8") as f:
51+
content = f.read()
52+
53+
links = link_pattern.findall(content)
54+
new_content = content
55+
56+
for link in links:
57+
if not urlparse(link).scheme and not os.path.isabs(link):
58+
full_path = os.path.normpath(os.path.join(os.path.dirname(file_path), link))
59+
if not full_path.endswith(".md") and not full_path.endswith(".mdx"):
60+
full_path += ".md"
61+
62+
# 处理 rename 情况
63+
for [from_path, to_path] in move_pairs:
64+
from_base, from_ext = os.path.splitext(from_path)
65+
to_base, to_ext = os.path.splitext(to_path)
66+
if (from_ext in [".md", ".mdx", ""] or to_ext in [".md", ".mdx", ""]) and (from_base == to_base):
67+
continue
68+
69+
if is_same_file(full_path, from_path):
70+
relative_to_path = os.path.relpath(to_path, os.path.dirname(file_path))
71+
relative_to_path = remove_suffix(relative_to_path, ".md")
72+
relative_to_path = remove_suffix(relative_to_path, ".mdx")
73+
print(f"🔄 {file_path}: Updated moved link {link} -> {relative_to_path}")
74+
new_content = new_content.replace(f"({link})", f"({relative_to_path})")
75+
change_detected = True
76+
77+
# 处理 delete 情况
78+
for deleted_path in deletes:
79+
if is_same_file(full_path, deleted_path):
80+
print(f"⚠️ {file_path}: Link to deleted file {link}")
81+
change_detected = True
82+
83+
# 处理死链修复
84+
if not os.path.exists(full_path):
85+
# 说明当前 link 是坏的
86+
file_base = os.path.basename(link)
87+
file_base = remove_suffix(file_base, ".md")
88+
file_base = remove_suffix(file_base, ".mdx")
89+
90+
found_path = find_nearest_file(file_base, os.path.dirname(file_path))
91+
if found_path:
92+
relative_to_path = os.path.relpath(found_path, os.path.dirname(file_path))
93+
relative_to_path = remove_suffix(relative_to_path, ".md")
94+
relative_to_path = remove_suffix(relative_to_path, ".mdx")
95+
print(f"🛠️ {file_path}: Fixed broken link {link} -> {relative_to_path}")
96+
new_content = new_content.replace(f"({link})", f"({relative_to_path})")
97+
change_detected = True
98+
else:
99+
print(f"❌ {file_path}: Could not fix broken link {link}")
100+
change_detected = True
101+
102+
if new_content != content:
103+
with open(file_path, "w", encoding="utf-8") as f:
104+
f.write(new_content)
105+
106+
def extract_file_changes(git_show_output: List[AnyStr]):
107+
print(f"Parsing commit lines...")
108+
content = b"".join(git_show_output).decode()
109+
110+
move_pattern = r"rename from (.+?)\nrename to (.+?)\n"
111+
move_matches = re.findall(move_pattern, content, re.DOTALL | re.MULTILINE)
112+
print(f"Moved files detected: {len(move_matches)}")
113+
114+
delete_pattern = r"diff --git a/(\S+) b/\1\ndeleted file mode \d+\nindex .+"
115+
delete_matches = re.findall(delete_pattern, content, re.DOTALL | re.MULTILINE)
116+
print(f"Deleted files detected: {len(delete_matches)}")
117+
118+
global move_pairs
119+
global deletes
120+
move_pairs = move_matches
121+
deletes = delete_matches
122+
123+
def travel(root_path: str):
124+
for root, dirs, files in os.walk(root_path):
125+
for file in files:
126+
if file.endswith(".md") or file.endswith(".mdx"):
127+
process_md_file(os.path.join(root, file))
128+
129+
if __name__ == "__main__":
130+
parser = argparse.ArgumentParser(description="Fix moved/deleted/broken md links for a commit")
131+
parser.add_argument("commit_id", type=str, help="Git commit id to check")
132+
args = parser.parse_args()
133+
134+
p = subprocess.Popen(
135+
"git show " + args.commit_id,
136+
shell=True,
137+
stdout=subprocess.PIPE,
138+
stderr=subprocess.STDOUT,
139+
)
140+
extract_file_changes(p.stdout.readlines())
141+
142+
for dir in search_dirs:
143+
travel(dir)
144+
145+
if change_detected:
146+
print("❗ Link issues detected and/or fixed.")
147+
sys.exit(1)
148+
else:
149+
print("✅ No issues detected.")

0 commit comments

Comments
 (0)