Skip to content

Commit 4f2bae0

Browse files
committed
Update CI-Workflow-Update, Requirements und MkDocs-Sync
This pull request updates the CI workflow configuration, refreshes the `requirements.txt` file with potentially new or updated dependencies, and synchronizes the `mkdocs.yml` configuration files across different language versions (likely `de` and `en`) to ensure consistency in the documentation build process. - Add "Validate internal links in Markdown" runner - Add "Validate image links in Markdown" runner - Remove obsolet Md-files and structure problems - Image replaced by default theme color (default is the theme “Light”, not “Dark-Mode”) - Fix Options in de docs - Synchronization of the installation process German - English - Fixing links... links... and more links... :-()
1 parent e2dd14c commit 4f2bae0

27 files changed

Lines changed: 428 additions & 90 deletions

File tree

.github/scripts/validate_images.py

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
import re
2+
import os
3+
import sys
4+
import requests
5+
import time
6+
7+
# Define the directories and their language labels
8+
DOCS_DIRS = {
9+
'de': os.path.join('docs', 'de'),
10+
'en': os.path.join('docs', 'en'),
11+
}
12+
13+
# Timeouts for external image check (in seconds)
14+
HTTP_TIMEOUT = 3
15+
16+
# Retry settings
17+
MAX_RETRIES = 3
18+
RETRY_DELAY = 3
19+
20+
# Custom headers to simulate a normal web browser request
21+
HEADERS = {
22+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
23+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
24+
'Accept-Encoding': 'gzip, deflate, br',
25+
'Accept-Language': 'en-US,en;q=0.9,de;q=0.8',
26+
'Connection': 'keep-alive',
27+
'Upgrade-Insecure-Requests': '1',
28+
'DNT': '1',
29+
'Referer': 'https://www.google.com/',
30+
'Cache-Control': 'max-age=0',
31+
}
32+
33+
def find_markdown_files(base_dirs):
34+
md_files = []
35+
for lang, base_dir in base_dirs.items():
36+
if not os.path.exists(base_dir):
37+
print(f"⚠️ Warning: Directory '{base_dir}' [{lang}] does not exist. Skipping.")
38+
continue
39+
for root, dirs, files in os.walk(base_dir):
40+
for file in files:
41+
if file.endswith(".md"):
42+
md_files.append((lang, os.path.join(root, file)))
43+
return md_files
44+
45+
def extract_image_paths_with_line_numbers(md_file):
46+
image_paths_with_lines = []
47+
pattern = re.compile(r'!\[[^\]]*\]\(([^)]+)\)', re.MULTILINE | re.IGNORECASE)
48+
49+
with open(md_file, 'r', encoding='utf-8') as f:
50+
for line_num, line in enumerate(f, 1):
51+
image_paths = pattern.findall(line)
52+
for path in image_paths:
53+
image_paths_with_lines.append((line_num, path.strip()))
54+
55+
return image_paths_with_lines
56+
57+
def validate_external_image(url, session):
58+
for attempt in range(MAX_RETRIES):
59+
try:
60+
resp = session.head(url, timeout=HTTP_TIMEOUT, allow_redirects=True)
61+
62+
if resp.status_code >= 400:
63+
print(f"⚠️ Error while accessing {url}: HTTP {resp.status_code}")
64+
if resp.status_code in (400, 403):
65+
print(f"⚠️ HEAD request for {url} failed with status {resp.status_code}. Trying GET request...")
66+
resp = session.get(url, timeout=HTTP_TIMEOUT, allow_redirects=True)
67+
print(f"GET Response: {resp.status_code} - {resp.text[:300]}")
68+
69+
if resp.status_code == 429:
70+
print(f"⚠️ Rate-limited while accessing {url}. Skipping retries...")
71+
return (f"HTTP {resp.status_code}", None)
72+
73+
if resp.status_code >= 400:
74+
return (f"HTTP {resp.status_code}", None)
75+
76+
return (None, resp.url)
77+
78+
except requests.exceptions.Timeout:
79+
print(f"⚠️ Timeout while accessing {url}. Retrying ({attempt + 1}/{MAX_RETRIES})...")
80+
time.sleep(RETRY_DELAY)
81+
except requests.exceptions.RequestException as e:
82+
print(f"⚠️ Error while accessing {url}: {str(e)}. Retrying ({attempt + 1}/{MAX_RETRIES})...")
83+
time.sleep(RETRY_DELAY)
84+
85+
return ('timeout', None)
86+
87+
def validate_image_paths(md_files, check_external=True):
88+
issues = []
89+
session = requests.Session()
90+
session.headers.update(HEADERS)
91+
92+
for lang, md_file in md_files:
93+
image_paths_with_lines = extract_image_paths_with_line_numbers(md_file)
94+
for line_num, path in image_paths_with_lines:
95+
if path.startswith("http://") or path.startswith("https://"):
96+
if check_external:
97+
error_desc, _ = validate_external_image(path, session)
98+
if error_desc:
99+
issues.append((lang, md_file, line_num, path, error_desc))
100+
continue
101+
102+
abs_path = os.path.normpath(os.path.join(os.path.dirname(md_file), path))
103+
if not os.path.exists(abs_path):
104+
issues.append((lang, md_file, line_num, path, 'local image missing'))
105+
106+
return issues
107+
108+
if __name__ == "__main__":
109+
print("🔍 Scanning markdown files for images in: " + ", ".join([f"{lang} ({dir})" for lang, dir in DOCS_DIRS.items()]))
110+
111+
md_files = find_markdown_files(DOCS_DIRS)
112+
if not md_files:
113+
print("⚠️ No Markdown files found in specified directories.")
114+
sys.exit(0)
115+
116+
issues = validate_image_paths(md_files, check_external=True)
117+
118+
print("\n🔎 Validation Results:")
119+
120+
if issues:
121+
print("\n❌ Image issues found:")
122+
for lang, md_file, line_num, img_path, description in issues:
123+
print(f" [{lang}] In file '{md_file}' at line {line_num}: Image '{img_path}' failed ({description})")
124+
print(f"\n❌ Validation failed: {len(issues)} issue(s) found.\n")
125+
sys.exit(1)
126+
else:
127+
print("\n✅ All referenced images (local & external) exist and are valid.")
128+
print("\n✅ Validation successful: no issues found.\n")
129+
sys.exit(0)
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
import re
2+
import os
3+
import sys
4+
import requests
5+
import time
6+
7+
# Define the directories and their language labels
8+
DOCS_DIRS = {
9+
'de': os.path.join('docs', 'de'),
10+
'en': os.path.join('docs', 'en'),
11+
}
12+
13+
# Timeouts for external link check (in seconds)
14+
HTTP_TIMEOUT = 3 # Increased timeout
15+
16+
# Retry settings
17+
MAX_RETRIES = 3 # Increased retries
18+
RETRY_DELAY = 3 # seconds
19+
20+
# Custom headers to simulate a normal web browser request (added more headers to bypass Cloudflare)
21+
HEADERS = {
22+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
23+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
24+
'Accept-Encoding': 'gzip, deflate, br',
25+
'Accept-Language': 'en-US,en;q=0.9,de;q=0.8',
26+
'Connection': 'keep-alive',
27+
'Upgrade-Insecure-Requests': '1',
28+
'DNT': '1', # Do Not Track header
29+
'Referer': 'https://www.google.com/', # Adding a generic referer
30+
'Cache-Control': 'max-age=0',
31+
}
32+
33+
def find_markdown_files(base_dirs):
34+
md_files = []
35+
for lang, base_dir in base_dirs.items():
36+
if not os.path.exists(base_dir):
37+
print(f"⚠️ Warning: Directory '{base_dir}' [{lang}] does not exist. Skipping.")
38+
continue
39+
for root, dirs, files in os.walk(base_dir):
40+
for file in files:
41+
if file.endswith(".md"):
42+
md_files.append((lang, os.path.join(root, file)))
43+
return md_files
44+
45+
def extract_links_with_line_numbers(md_file):
46+
links_with_lines = []
47+
pattern = re.compile(r'\[([^\]]+)\]\((https?://[^\s]+)\)|\[#([^\)]+)\]', re.MULTILINE | re.IGNORECASE)
48+
49+
with open(md_file, 'r', encoding='utf-8') as f:
50+
for line_num, line in enumerate(f, 1):
51+
links = pattern.findall(line)
52+
for link_text, path, anchor in links:
53+
if path:
54+
# External link
55+
links_with_lines.append((line_num, link_text, path))
56+
elif anchor:
57+
# Internal anchor link
58+
links_with_lines.append((line_num, link_text, f'#{anchor}'))
59+
60+
return links_with_lines
61+
62+
def validate_external_link(path):
63+
session = requests.Session() # Create a session to reuse headers and connections
64+
session.headers.update(HEADERS) # Apply the custom headers to the session
65+
66+
for attempt in range(MAX_RETRIES):
67+
try:
68+
# Try a HEAD request first
69+
resp = session.get(path, timeout=HTTP_TIMEOUT, allow_redirects=True, stream=True)
70+
71+
# Log only errors (i.e., status codes >= 400 or 429)
72+
if resp.status_code >= 400:
73+
print(f"⚠️ Error while accessing {path}: HTTP {resp.status_code}")
74+
if resp.status_code == 403 or resp.status_code == 400:
75+
# If HEAD request fails, try a GET request
76+
print(f"⚠️ HEAD request for {path} failed with status {resp.status_code}. Trying GET request...")
77+
resp = session.get(path, timeout=HTTP_TIMEOUT, allow_redirects=True)
78+
print(f"GET Response: {resp.status_code} - {resp.text[:300]}") # Log the GET response for debugging
79+
80+
if resp.status_code == 429:
81+
print(f"⚠️ Rate-limited while accessing {path}. Skipping retries due to CI runner...")
82+
return (f"HTTP {resp.status_code}", None)
83+
84+
if resp.status_code >= 400:
85+
return (f"HTTP {resp.status_code}", None)
86+
87+
# Return nothing for HTTP 200 OK responses (do not log)
88+
return (None, resp.url)
89+
90+
except requests.exceptions.Timeout:
91+
print(f"⚠️ Timeout while accessing {path}. Retrying ({attempt + 1}/{MAX_RETRIES})...")
92+
time.sleep(RETRY_DELAY)
93+
except requests.exceptions.RequestException as e:
94+
print(f"⚠️ Error while accessing {path}: {str(e)}. Retrying ({attempt + 1}/{MAX_RETRIES})...")
95+
time.sleep(RETRY_DELAY)
96+
97+
return ('timeout', None)
98+
99+
def validate_links(md_files):
100+
issues = []
101+
102+
for lang, md_file in md_files:
103+
links_with_lines = extract_links_with_line_numbers(md_file)
104+
for line_num, link_text, path in links_with_lines:
105+
if path.startswith("http://") or path.startswith("https://"):
106+
error_desc, _ = validate_external_link(path)
107+
if error_desc:
108+
if error_desc == 'timeout':
109+
description = 'timeout'
110+
else:
111+
description = error_desc
112+
issues.append((lang, md_file, line_num, path, description))
113+
elif path.startswith("#"):
114+
# Check if the internal anchor link exists in the current file
115+
with open(md_file, 'r', encoding='utf-8') as f:
116+
file_content = f.read()
117+
if path not in file_content:
118+
issues.append((lang, md_file, line_num, path, 'anchor link missing'))
119+
else:
120+
abs_path = os.path.normpath(os.path.join(os.path.dirname(md_file), path))
121+
if not os.path.exists(abs_path):
122+
description = 'internal link missing'
123+
issues.append((lang, md_file, line_num, path, description))
124+
125+
return issues
126+
127+
if __name__ == "__main__":
128+
print("🔍 Scanning markdown files for internal and external links in: " + ", ".join([f"{lang} ({dir})" for lang, dir in DOCS_DIRS.items()]))
129+
130+
md_files = find_markdown_files(DOCS_DIRS)
131+
if not md_files:
132+
print("⚠️ No Markdown files found in specified directories.")
133+
sys.exit(0)
134+
135+
issues = validate_links(md_files)
136+
137+
print("\n🔎 Validation Results:")
138+
139+
if issues:
140+
print("\n❌ Link issues found:")
141+
for lang, md_file, line_num, link_path, description in issues:
142+
print(f" [{lang}] In file '{md_file}' at line {line_num}: Link '{link_path}' failed ({description})")
143+
print(f"\n❌ Validation failed: {len(issues)} issue(s) found.\n")
144+
sys.exit(1)
145+
else:
146+
print("\n✅ All internal and external links are valid.")
147+
print("\n✅ Validation successful: no issues found.\n")
148+
sys.exit(0)

.github/workflows/main.yml

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,37 +3,55 @@ name: CI
33
on:
44
push:
55
branches:
6-
- master
6+
- master
7+
pull_request:
8+
branches:
9+
- master
10+
types: [opened, synchronize, reopened]
711

812
jobs:
9-
build-deploy:
13+
build-and-deploy-documentation:
1014
runs-on: ubuntu-latest
1115
steps:
12-
- uses: actions/checkout@v3
16+
- uses: actions/checkout@v4
1317
with:
1418
fetch-depth: 0
19+
1520
- name: Set up Python
16-
uses: actions/setup-python@v4
21+
uses: actions/setup-python@v5
1722
with:
1823
python-version: 3.11
19-
- name: Caching dependencies
24+
25+
- name: Cache Python dependencies
2026
uses: actions/cache@v3
2127
with:
2228
key: ${{ github.ref }}
2329
path: .cache
24-
- name: Install dependencies
30+
31+
- name: Install Python dependencies
2532
run: |
2633
python -m pip install --upgrade pip
2734
pip install -r requirements.txt
28-
- name: Build English
35+
pip install requests
36+
37+
- name: Validate internal links in Markdown
38+
run: python .github/scripts/validate_internal_links.py
39+
40+
- name: Validate image links in Markdown
41+
run: python .github/scripts/validate_images.py
42+
43+
- name: Build English Documentation
2944
run: mkdocs build -f config/en/mkdocs.yml
30-
- name: Build German
45+
46+
- name: Build German Documentation
3147
run: mkdocs build -f config/de/mkdocs.yml
48+
3249
- name: Copy common root files (including legacy redirects)
3350
run: cp -R docs/root/* site
34-
- name: Deploy
51+
52+
- name: Deploy to GitHub Pages
53+
if: github.ref == 'refs/heads/master' && success()
3554
uses: peaceiris/actions-gh-pages@v3
36-
if: success()
3755
with:
3856
github_token: ${{ secrets.GITHUB_TOKEN }}
3957
publish_dir: ./site

.project

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<projectDescription>
3+
<name>portfolio-help</name>
4+
<comment></comment>
5+
<projects>
6+
</projects>
7+
<buildSpec>
8+
</buildSpec>
9+
<natures>
10+
</natures>
11+
</projectDescription>

0 commit comments

Comments
 (0)