22set -e
33
44python3 - << 'EOF '
5+ """
6+ MCP TOOLBOX: SOURCE PAGE LINTER
7+ This script enforces a standardized structure for integration Source pages
8+ (_index.md files). It ensures users can predictably find
9+ information across all database integrations.
10+
11+ MAINTENANCE GUIDE:
12+ ------------------
13+ 1. TO ADD A NEW HEADING:
14+ Add the exact heading text to the 'ALLOWED_ORDER' list in the desired
15+ sequence.
16+
17+ 2. TO MAKE A HEADING MANDATORY:
18+ Add the heading text to the 'REQUIRED' set.
19+
20+ 3. TO IGNORE NEW CONTENT TYPES:
21+ Update the regex in the 'clean_body' variable (Step 3) to strip out
22+ Markdown before linting.
23+
24+ 4. SCOPE:
25+ This script ignores top-level directory files and only targets
26+ integrations/{provider}/_index.md.
27+ """
28+
529import os
630import re
731import sys
832from pathlib import Path
933
10- integration_dir = Path("./docs/en/integrations")
11- if not integration_dir.exists():
12- print("Info: Directory './docs/en/integrations' not found. Skipping linting.")
13- sys.exit(0)
14-
34+ # --- CONFIGURATION ---
1535ALLOWED_ORDER = [
1636 "About",
1737 "Available Tools",
@@ -23,95 +43,78 @@ ALLOWED_ORDER = [
2343 "Additional Resources"
2444]
2545REQUIRED = {"About", "Example", "Reference"}
26-
27- # Regex to catch any variation of the list-tools shortcode, including parameters
2846SHORTCODE_PATTERN = r"\{\{<\s*list-tools.*?>\}\}"
47+ # ---------------------
48+
49+ integration_dir = Path("./docs/en/integrations")
50+ if not integration_dir.exists():
51+ print("Info: Directory './docs/en/integrations' not found. Skipping linting.")
52+ sys.exit(0)
2953
3054has_errors = False
3155
32- # Find all _index.md files inside the subdirectories of integrations/
3356for filepath in integration_dir.rglob("_index.md"):
34- # Skip the top-level integrations/_index.md if it exists
3557 if filepath.parent == integration_dir:
3658 continue
3759
3860 with open(filepath, "r", encoding="utf-8") as f:
3961 content = f.read()
4062
41- # Separate YAML frontmatter from the markdown body
4263 match = re.match(r'^\s*---\s*\n(.*?)\n---\s*(.*)', content, re.DOTALL)
4364 if match:
44- frontmatter = match.group(1)
45- body = match.group(2)
65+ frontmatter, body = match.group(1), match.group(2)
4666 else:
47- frontmatter = ""
48- body = content
67+ frontmatter, body = "", content
4968
50- # If the file has no markdown content (metadata placeholder only), skip it entirely
5169 if not body.strip():
5270 continue
5371
5472 file_errors = False
5573
56- # 1. Check Frontmatter Title
57- title_source = frontmatter if frontmatter else content
58- title_match = re.search(r"^title:\s*[\"']?(.*?)[\"']?\s*$", title_source, re.MULTILINE)
74+ # 1. Frontmatter Title Check
75+ title_match = re.search(r"^title:\s*[\"']?(.*?)[\"']?\s*$", frontmatter if frontmatter else content, re.MULTILINE)
5976 if not title_match or not title_match.group(1).strip().endswith("Source"):
60- found_title = title_match.group(1) if title_match else "None"
61- print(f"[{filepath}] Error: Frontmatter title must end with 'Source'. Found: '{found_title}'")
77+ print(f"[{filepath}] Error: Title must end with 'Source'.")
6278 file_errors = True
6379
64- # 2. Check Shortcode Placement ONLY IF "Available Tools" heading is present
65- tools_section_match = re.search(r"^##\s+Available Tools\s*(.*?)(?=^##\s|\Z)", body, re.MULTILINE | re.DOTALL)
66- if tools_section_match:
67- if not re.search(SHORTCODE_PATTERN, tools_section_match.group(1)):
68- print(f"[{filepath}] Error: The list-tools shortcode must be placed under the '## Available Tools' heading.")
69- file_errors = True
70- else:
71- # Prevent edge case where shortcode is used but the heading was forgotten
72- if re.search(SHORTCODE_PATTERN, body):
73- print(f"[{filepath}] Error: A list-tools shortcode was found, but the '## Available Tools' heading is missing.")
80+ # 2. Shortcode Placement Check
81+ tools_section = re.search(r"^##\s+Available Tools\s*(.*?)(?=^##\s|\Z)", body, re.MULTILINE | re.DOTALL)
82+ if tools_section:
83+ if not re.search(SHORTCODE_PATTERN, tools_section.group(1)):
84+ print(f"[{filepath}] Error: {{< list-tools >}} must be under '## Available Tools'.")
7485 file_errors = True
86+ elif re.search(SHORTCODE_PATTERN, body):
87+ print(f"[{filepath}] Error: {{< list-tools >}} found, but '## Available Tools' heading is missing.")
88+ file_errors = True
7589
76- # 3. Strip code blocks from body to avoid linting example markdown headings
90+ # 3. Heading Linting (Stripping code blocks first)
7791 clean_body = re.sub(r"```.*?```", "", body, flags=re.DOTALL)
7892
79- # 4. Check H1 Headings
8093 if re.search(r"^#\s+\w+", clean_body, re.MULTILINE):
81- print(f"[{filepath}] Error: H1 headings (#) are forbidden in the body.")
94+ print(f"[{filepath}] Error: H1 (#) headings are forbidden in the body.")
8295 file_errors = True
8396
84- # 5. Check H2 Headings
85- h2s = re.findall(r"^##\s+(.*)", clean_body, re.MULTILINE)
86- h2s = [h2.strip() for h2 in h2s]
97+ h2s = [h.strip() for h in re.findall(r"^##\s+(.*)", clean_body, re.MULTILINE)]
8798
88- # Missing Required
89- missing = REQUIRED - set(h2s)
90- if missing:
91- print(f"[{filepath}] Error: Missing required H2 headings: {missing}")
99+ # 4. Required & Unauthorized Check
100+ if missing := (REQUIRED - set(h2s)):
101+ print(f"[{filepath}] Error: Missing required H2s: {missing}")
92102 file_errors = True
93103
94- # Unauthorized Headings
95- unauthorized = set(h2s) - set(ALLOWED_ORDER)
96- if unauthorized:
97- print(f"[{filepath}] Error: Unauthorized H2 headings found: {unauthorized}")
104+ if unauthorized := (set(h2s) - set(ALLOWED_ORDER)):
105+ print(f"[{filepath}] Error: Unauthorized H2s found: {unauthorized}")
98106 file_errors = True
99107
100- # Strict Ordering
101- filtered_h2s = [h for h in h2s if h in ALLOWED_ORDER]
102- expected_order = [h for h in ALLOWED_ORDER if h in h2s]
103- if filtered_h2s != expected_order:
104- print(f"[{filepath}] Error: Headings are out of order.")
105- print(f" Expected: {expected_order}")
106- print(f" Found: {filtered_h2s}")
108+ # 5. Order Check
109+ if [h for h in h2s if h in ALLOWED_ORDER] != [h for h in ALLOWED_ORDER if h in h2s]:
110+ print(f"[{filepath}] Error: Headings out of order. Reference: {ALLOWED_ORDER}")
107111 file_errors = True
108112
109- if file_errors:
110- has_errors = True
113+ if file_errors: has_errors = True
111114
112115if has_errors:
113- print("Linting failed. Please fix the structure errors above.")
116+ print("Linting failed. Fix structure errors above.")
114117 sys.exit(1)
115- else:
116- print("Success: All Source pages passed structure validation." )
118+ print("Success: Source pages validated.")
119+ sys.exit(0 )
117120EOF
0 commit comments