Skip to content

Commit 0dcd249

Browse files
committed
a script to prevent this happening in future
1 parent d08105d commit 0dcd249

File tree

1 file changed

+143
-0
lines changed

1 file changed

+143
-0
lines changed

Diff for: scripts/link-check.py

+143
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
import asyncio
2+
import re
3+
import time
4+
import argparse
5+
from collections import defaultdict
6+
from aiohttp import ClientSession
7+
from pathlib import Path
8+
from typing import List, Set, Tuple
9+
10+
# ANSI color codes
11+
RESET = "\033[0m"
12+
GREEN = "\033[32m"
13+
RED = "\033[31m"
14+
ORANGE = "\033[33m"
15+
WHITE = "\033[37m"
16+
17+
# User-Agent to mimic a browser
18+
USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.5790.102 Safari/537.36"
19+
HEADERS = {"User-Agent": USER_AGENT}
20+
21+
# Simple URL Regex to capture valid URLs only
22+
URL_REGEX = r'https?://[^\s\'"<>]+'
23+
24+
# Dictionary to store results
25+
results = defaultdict(list)
26+
27+
# Set to track tested URLs
28+
tested_urls: Set[str] = set()
29+
30+
# Counters for performance
31+
good_links = 0
32+
bad_links = 0
33+
34+
async def fetch(url: str, session: ClientSession, filename: str, line_number: int) -> None:
35+
global good_links, bad_links
36+
try:
37+
async with session.get(url, timeout=10) as response:
38+
status = response.status
39+
# Treat 403 as valid (so it's "good")
40+
if status == 200:
41+
color = GREEN
42+
good_links += 1
43+
elif status == 403:
44+
color = GREEN # 403 is OK for us
45+
good_links += 1
46+
else:
47+
color = RED
48+
bad_links += 1
49+
results[filename].append((url, status, color, line_number))
50+
except Exception as e:
51+
results[filename].append((url, f"Error: {str(e)}", RED, line_number))
52+
bad_links += 1
53+
54+
async def check_urls_in_file(filename: str, urls_with_lines: List[Tuple[str, int]], session: ClientSession) -> None:
55+
tasks = []
56+
for url, line_number in urls_with_lines:
57+
if url not in tested_urls:
58+
tested_urls.add(url) # Add URL to the set to avoid re-testing
59+
tasks.append(fetch(url, session, filename, line_number))
60+
await asyncio.gather(*tasks)
61+
62+
async def process_files(files: List[str]) -> None:
63+
async with ClientSession(headers=HEADERS) as session:
64+
tasks = []
65+
for file in files:
66+
urls_with_lines = extract_urls_from_file(file)
67+
if urls_with_lines:
68+
tasks.append(check_urls_in_file(file, urls_with_lines, session))
69+
await asyncio.gather(*tasks)
70+
71+
def extract_urls_from_file(filepath: str) -> List[Tuple[str, int]]:
72+
"""Extract all URLs and their line numbers from the given file."""
73+
urls_with_lines = []
74+
with open(filepath, "r", encoding="utf-8") as file:
75+
for line_number, line in enumerate(file, 1):
76+
urls_in_line = re.findall(URL_REGEX, line)
77+
for url in urls_in_line:
78+
urls_with_lines.append((url, line_number))
79+
return urls_with_lines
80+
81+
def collect_files(extensions: List[str]) -> List[str]:
82+
"""Collect files with the given extensions from the current directory."""
83+
files = []
84+
for ext in extensions:
85+
files.extend(Path(".").rglob(f"*.{ext}"))
86+
return [str(file) for file in files]
87+
88+
def print_results(verbose: bool, errors_only: bool) -> None:
89+
"""Print results in a tree-like structure based on verbosity level."""
90+
for filename, url_statuses in results.items():
91+
for url, status, color, line_number in url_statuses:
92+
if errors_only and status == 200:
93+
continue # Skip successful links when printing errors-only
94+
95+
if verbose or (errors_only and status != 200):
96+
print(f"\n{ORANGE}{filename}:{line_number}{RESET}")
97+
print(f" {color}{status}{RESET} {WHITE}{url}{RESET}")
98+
99+
def parse_arguments() -> argparse.Namespace:
100+
"""Parse command-line arguments for verbosity control."""
101+
parser = argparse.ArgumentParser(description="Check URLs in project files.")
102+
parser.add_argument(
103+
"--verbose", action="store_true", help="Print all URLs and their statuses (success and error)"
104+
)
105+
parser.add_argument(
106+
"--errors-only", action="store_true", help="Print only error URLs"
107+
)
108+
parser.add_argument(
109+
"--silent", action="store_true", help="Print nothing, exit 0 for no bad links, 1 otherwise"
110+
)
111+
return parser.parse_args()
112+
113+
def main() -> None:
114+
args = parse_arguments()
115+
116+
# Start performance counter
117+
start_time = time.perf_counter()
118+
119+
# Specify file extensions to search for
120+
extensions = ["rs", "wgsl", "md", "txt", "toml", "py", "sh"]
121+
files = collect_files(extensions)
122+
123+
# Run asyncio event loop
124+
asyncio.run(process_files(files))
125+
126+
# Print results based on verbosity level
127+
if not args.silent:
128+
print_results(args.verbose, args.errors_only)
129+
130+
# End performance counter
131+
end_time = time.perf_counter()
132+
total_time = end_time - start_time
133+
134+
# If in silent mode, exit with code 0 if no bad links, or 1 if bad links exist
135+
if args.silent:
136+
exit(0 if bad_links == 0 else 1)
137+
138+
# Summary
139+
print(f"\nSummary: {good_links} good links, {bad_links} bad links.")
140+
print(f"Total time: {total_time:.2f} seconds.")
141+
142+
if __name__ == "__main__":
143+
main()

0 commit comments

Comments
 (0)