portfolio-performance
diff --git a/‎.github/scripts/validate_images.py‎
Lines changed: 129 additions & 0 deletions b/‎.github/scripts/validate_images.py‎
Lines changed: 129 additions & 0 deletions
diff --git a/‎.github/scripts/validate_internal_links.py‎
Lines changed: 148 additions & 0 deletions b/‎.github/scripts/validate_internal_links.py‎
Lines changed: 148 additions & 0 deletions
diff --git a/‎.github/workflows/main.yml‎
Lines changed: 28 additions & 10 deletions b/‎.github/workflows/main.yml‎
Lines changed: 28 additions & 10 deletions
diff --git a/‎.project‎
Lines changed: 11 additions & 0 deletions b/‎.project‎
Lines changed: 11 additions & 0 deletions
@@ -0,0 +1,129 @@
+import re
+import os
+import sys
+import requests
+import time
+
+# Define the directories and their language labels
+DOCS_DIRS = {
+    'de': os.path.join('docs', 'de'),
+    'en': os.path.join('docs', 'en'),
+}
+
+# Timeouts for external image check (in seconds)
+HTTP_TIMEOUT = 3
+
+# Retry settings
+MAX_RETRIES = 3
+RETRY_DELAY = 3
+
+# Custom headers to simulate a normal web browser request
+HEADERS = {
+    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
+    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
+    'Accept-Encoding': 'gzip, deflate, br',
+    'Accept-Language': 'en-US,en;q=0.9,de;q=0.8',
+    'Connection': 'keep-alive',
+    'Upgrade-Insecure-Requests': '1',
+    'DNT': '1',
+    'Referer': 'https://www.google.com/',
+    'Cache-Control': 'max-age=0',
+}
+
+def find_markdown_files(base_dirs):
+    md_files = []
+    for lang, base_dir in base_dirs.items():
+        if not os.path.exists(base_dir):
+            print(f"⚠️ Warning: Directory '{base_dir}' [{lang}] does not exist. Skipping.")
+            continue
+        for root, dirs, files in os.walk(base_dir):
+            for file in files:
+                if file.endswith(".md"):
+                    md_files.append((lang, os.path.join(root, file)))
+    return md_files
+
+def extract_image_paths_with_line_numbers(md_file):
+    image_paths_with_lines = []
+    pattern = re.compile(r'!\[[^\]]*\]\(([^)]+)\)', re.MULTILINE | re.IGNORECASE)
+    
+    with open(md_file, 'r', encoding='utf-8') as f:
+        for line_num, line in enumerate(f, 1):
+            image_paths = pattern.findall(line)
+            for path in image_paths:
+                image_paths_with_lines.append((line_num, path.strip()))
+    
+    return image_paths_with_lines
+
+def validate_external_image(url, session):
+    for attempt in range(MAX_RETRIES):
+        try:
+            resp = session.head(url, timeout=HTTP_TIMEOUT, allow_redirects=True)
+
+            if resp.status_code >= 400:
+                print(f"⚠️ Error while accessing {url}: HTTP {resp.status_code}")
+                if resp.status_code in (400, 403):
+                    print(f"⚠️ HEAD request for {url} failed with status {resp.status_code}. Trying GET request...")
+                    resp = session.get(url, timeout=HTTP_TIMEOUT, allow_redirects=True)
+                    print(f"GET Response: {resp.status_code} - {resp.text[:300]}")
+
+            if resp.status_code == 429:
+                print(f"⚠️ Rate-limited while accessing {url}. Skipping retries...")
+                return (f"HTTP {resp.status_code}", None)
+
+            if resp.status_code >= 400:
+                return (f"HTTP {resp.status_code}", None)
+
+            return (None, resp.url)
+
+        except requests.exceptions.Timeout:
+            print(f"⚠️ Timeout while accessing {url}. Retrying ({attempt + 1}/{MAX_RETRIES})...")
+            time.sleep(RETRY_DELAY)
+        except requests.exceptions.RequestException as e:
+            print(f"⚠️ Error while accessing {url}: {str(e)}. Retrying ({attempt + 1}/{MAX_RETRIES})...")
+            time.sleep(RETRY_DELAY)
+
+    return ('timeout', None)
+
+def validate_image_paths(md_files, check_external=True):
+    issues = []
+    session = requests.Session()
+    session.headers.update(HEADERS)
+
+    for lang, md_file in md_files:
+        image_paths_with_lines = extract_image_paths_with_line_numbers(md_file)
+        for line_num, path in image_paths_with_lines:
+            if path.startswith("http://") or path.startswith("https://"):
+                if check_external:
+                    error_desc, _ = validate_external_image(path, session)
+                    if error_desc:
+                        issues.append((lang, md_file, line_num, path, error_desc))
+                continue
+            
+            abs_path = os.path.normpath(os.path.join(os.path.dirname(md_file), path))
+            if not os.path.exists(abs_path):
+                issues.append((lang, md_file, line_num, path, 'local image missing'))
+    
+    return issues
+
+if __name__ == "__main__":
+    print("🔍 Scanning markdown files for images in: " + ", ".join([f"{lang} ({dir})" for lang, dir in DOCS_DIRS.items()]))
+
+    md_files = find_markdown_files(DOCS_DIRS)
+    if not md_files:
+        print("⚠️ No Markdown files found in specified directories.")
+        sys.exit(0)
+
+    issues = validate_image_paths(md_files, check_external=True)
+
+    print("\n🔎 Validation Results:")
+
+    if issues:
+        print("\n❌ Image issues found:")
+        for lang, md_file, line_num, img_path, description in issues:
+            print(f"  [{lang}] In file '{md_file}' at line {line_num}: Image '{img_path}' failed ({description})")
+        print(f"\n❌ Validation failed: {len(issues)} issue(s) found.\n")
+        sys.exit(1)
+    else:
+        print("\n✅ All referenced images (local & external) exist and are valid.")
+        print("\n✅ Validation successful: no issues found.\n")
+        sys.exit(0)
@@ -0,0 +1,148 @@
+import re
+import os
+import sys
+import requests
+import time
+
+# Define the directories and their language labels
+DOCS_DIRS = {
+    'de': os.path.join('docs', 'de'),
+    'en': os.path.join('docs', 'en'),
+}
+
+# Timeouts for external link check (in seconds)
+HTTP_TIMEOUT = 3  # Increased timeout 
+
+# Retry settings
+MAX_RETRIES = 3  # Increased retries
+RETRY_DELAY = 3  # seconds
+
+# Custom headers to simulate a normal web browser request (added more headers to bypass Cloudflare)
+HEADERS = {
+    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
+    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
+    'Accept-Encoding': 'gzip, deflate, br',
+    'Accept-Language': 'en-US,en;q=0.9,de;q=0.8',
+    'Connection': 'keep-alive',
+    'Upgrade-Insecure-Requests': '1',
+    'DNT': '1',  # Do Not Track header
+    'Referer': 'https://www.google.com/',  # Adding a generic referer
+    'Cache-Control': 'max-age=0',
+}
+
+def find_markdown_files(base_dirs):
+    md_files = []
+    for lang, base_dir in base_dirs.items():
+        if not os.path.exists(base_dir):
+            print(f"⚠️ Warning: Directory '{base_dir}' [{lang}] does not exist. Skipping.")
+            continue
+        for root, dirs, files in os.walk(base_dir):
+            for file in files:
+                if file.endswith(".md"):
+                    md_files.append((lang, os.path.join(root, file)))
+    return md_files
+
+def extract_links_with_line_numbers(md_file):
+    links_with_lines = []
+    pattern = re.compile(r'\[([^\]]+)\]\((https?://[^\s]+)\)|\[#([^\)]+)\]', re.MULTILINE | re.IGNORECASE)
+    
+    with open(md_file, 'r', encoding='utf-8') as f:
+        for line_num, line in enumerate(f, 1):
+            links = pattern.findall(line)
+            for link_text, path, anchor in links:
+                if path:
+                    # External link
+                    links_with_lines.append((line_num, link_text, path))
+                elif anchor:
+                    # Internal anchor link
+                    links_with_lines.append((line_num, link_text, f'#{anchor}'))
+    
+    return links_with_lines
+
+def validate_external_link(path):
+    session = requests.Session()  # Create a session to reuse headers and connections
+    session.headers.update(HEADERS)  # Apply the custom headers to the session
+
+    for attempt in range(MAX_RETRIES):
+        try:
+            # Try a HEAD request first
+            resp = session.get(path, timeout=HTTP_TIMEOUT, allow_redirects=True, stream=True)
+
+            # Log only errors (i.e., status codes >= 400 or 429)
+            if resp.status_code >= 400:
+                print(f"⚠️ Error while accessing {path}: HTTP {resp.status_code}")
+                if resp.status_code == 403 or resp.status_code == 400:
+                    # If HEAD request fails, try a GET request
+                    print(f"⚠️ HEAD request for {path} failed with status {resp.status_code}. Trying GET request...")
+                    resp = session.get(path, timeout=HTTP_TIMEOUT, allow_redirects=True)
+                    print(f"GET Response: {resp.status_code} - {resp.text[:300]}")  # Log the GET response for debugging
+
+            if resp.status_code == 429:
+                print(f"⚠️ Rate-limited while accessing {path}. Skipping retries due to CI runner...")
+                return (f"HTTP {resp.status_code}", None)
+
+            if resp.status_code >= 400:
+                return (f"HTTP {resp.status_code}", None)
+
+            # Return nothing for HTTP 200 OK responses (do not log)
+            return (None, resp.url)
+
+        except requests.exceptions.Timeout:
+            print(f"⚠️ Timeout while accessing {path}. Retrying ({attempt + 1}/{MAX_RETRIES})...")
+            time.sleep(RETRY_DELAY)
+        except requests.exceptions.RequestException as e:
+            print(f"⚠️ Error while accessing {path}: {str(e)}. Retrying ({attempt + 1}/{MAX_RETRIES})...")
+            time.sleep(RETRY_DELAY)
+    
+    return ('timeout', None)
+
+def validate_links(md_files):
+    issues = []
+
+    for lang, md_file in md_files:
+        links_with_lines = extract_links_with_line_numbers(md_file)
+        for line_num, link_text, path in links_with_lines:
+            if path.startswith("http://") or path.startswith("https://"):
+                error_desc, _ = validate_external_link(path)
+                if error_desc:
+                    if error_desc == 'timeout':
+                        description = 'timeout'
+                    else:
+                        description = error_desc
+                    issues.append((lang, md_file, line_num, path, description))
+            elif path.startswith("#"):
+                # Check if the internal anchor link exists in the current file
+                with open(md_file, 'r', encoding='utf-8') as f:
+                    file_content = f.read()
+                    if path not in file_content:
+                        issues.append((lang, md_file, line_num, path, 'anchor link missing'))
+            else:
+                abs_path = os.path.normpath(os.path.join(os.path.dirname(md_file), path))
+                if not os.path.exists(abs_path):
+                    description = 'internal link missing'
+                    issues.append((lang, md_file, line_num, path, description))
+
+    return issues
+
+if __name__ == "__main__":
+    print("🔍 Scanning markdown files for internal and external links in: " + ", ".join([f"{lang} ({dir})" for lang, dir in DOCS_DIRS.items()]))
+
+    md_files = find_markdown_files(DOCS_DIRS)
+    if not md_files:
+        print("⚠️ No Markdown files found in specified directories.")
+        sys.exit(0)
+
+    issues = validate_links(md_files)
+
+    print("\n🔎 Validation Results:")
+
+    if issues:
+        print("\n❌ Link issues found:")
+        for lang, md_file, line_num, link_path, description in issues:
+            print(f"  [{lang}] In file '{md_file}' at line {line_num}: Link '{link_path}' failed ({description})")
+        print(f"\n❌ Validation failed: {len(issues)} issue(s) found.\n")
+        sys.exit(1)
+    else:
+        print("\n✅ All internal and external links are valid.")
+        print("\n✅ Validation successful: no issues found.\n")
+        sys.exit(0)
@@ -3,37 +3,55 @@ name: CI
 on:
   push:
     branches:
-    - master
+      - master
+  pull_request:
+    branches:
+      - master
+    types: [opened, synchronize, reopened]
 
 jobs:
-  build-deploy:
+  build-and-deploy-documentation:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
       with:
         fetch-depth: 0
+
     - name: Set up Python
-      uses: actions/setup-python@v4
+      uses: actions/setup-python@v5
       with:
         python-version: 3.11
-    - name: Caching dependencies
+
+    - name: Cache Python dependencies
       uses: actions/cache@v3
       with:
         key: ${{ github.ref }}
         path: .cache
-    - name: Install dependencies
+
+    - name: Install Python dependencies
       run: |
         python -m pip install --upgrade pip
         pip install -r requirements.txt
-    - name: Build English
+        pip install requests
+
+    - name: Validate internal links in Markdown
+      run: python .github/scripts/validate_internal_links.py
+
+    - name: Validate image links in Markdown
+      run: python .github/scripts/validate_images.py
+
+    - name: Build English Documentation
       run: mkdocs build -f config/en/mkdocs.yml
-    - name: Build German
+
+    - name: Build German Documentation
       run: mkdocs build -f config/de/mkdocs.yml
+
     - name: Copy common root files (including legacy redirects)
       run: cp -R docs/root/* site
-    - name: Deploy
+
+    - name: Deploy to GitHub Pages
+      if: github.ref == 'refs/heads/master' && success()
       uses: peaceiris/actions-gh-pages@v3
-      if: success()
       with:
         github_token: ${{ secrets.GITHUB_TOKEN }}
         publish_dir: ./site
@@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+	<name>portfolio-help</name>
+	<comment></comment>
+	<projects>
+	</projects>
+	<buildSpec>
+	</buildSpec>
+	<natures>
+	</natures>
+</projectDescription>