1010 'en' : os .path .join ('docs' , 'en' ),
1111}
1212
13+ # Path to disallowed domains file
14+ DISALLOWED_DOMAINS_FILE = os .path .join ('.github' , 'ci' , 'disallowed_image_domains.txt' )
15+
1316# Timeouts for external image check (in seconds)
1417HTTP_TIMEOUT = 3
1518
3235 'TE' : 'Trailers' ,
3336}
3437
38+ def load_disallowed_domains (file_path ):
39+ """Load disallowed domains from a text file into a set."""
40+ disallowed_domains = set ()
41+ if os .path .exists (file_path ):
42+ with open (file_path , 'r' , encoding = 'utf-8' ) as f :
43+ for line in f :
44+ line = line .strip ()
45+ if line and not line .startswith ("#" ):
46+ disallowed_domains .add (line .lower ())
47+ return disallowed_domains
48+
49+ def is_disallowed_url (path , disallowed_domains ):
50+ """Return True if the URL contains any disallowed domain."""
51+ return any (domain in path .lower () for domain in disallowed_domains )
52+
3553def find_markdown_files (base_dirs ):
3654 """Recursively find all markdown files in given directories."""
3755 md_files = []
@@ -109,7 +127,7 @@ def validate_external_image(path, checked_urls):
109127 checked_urls [path ] = 'timeout'
110128 return 'timeout'
111129
112- def validate_images (md_files ):
130+ def validate_images (md_files , disallowed_domains ):
113131 """Validate all image references in markdown files."""
114132 issues = []
115133 checked_urls = {}
@@ -121,6 +139,12 @@ def validate_images(md_files):
121139 # Checking external image
122140 print (f"🔵 Checking external image { path } " )
123141
142+ if is_disallowed_url (path , disallowed_domains ):
143+ description = 'disallowed domain'
144+ issues .append ((lang , md_file , line_num , path , description ))
145+ print (f"❌ Disallowed domain detected { path } " )
146+ continue
147+
124148 if path in checked_urls :
125149 error_desc = checked_urls [path ]
126150 else :
@@ -141,12 +165,16 @@ def validate_images(md_files):
141165if __name__ == "__main__" :
142166 print ("🔍 Scanning markdown files for image references in: " + ", " .join ([f"{ lang } ({ dir } )" for lang , dir in DOCS_DIRS .items ()]))
143167
168+ disallowed_domains = load_disallowed_domains (DISALLOWED_DOMAINS_FILE )
169+ if disallowed_domains :
170+ print (f"ℹ️ Loaded { len (disallowed_domains )} disallowed domains from { DISALLOWED_DOMAINS_FILE } " )
171+
144172 md_files = find_markdown_files (DOCS_DIRS )
145173 if not md_files :
146174 print ("⚠️ No Markdown files found in specified directories." )
147175 sys .exit (0 )
148176
149- issues = validate_images (md_files )
177+ issues = validate_images (md_files , disallowed_domains )
150178
151179 print ("\n 🔎 Validation Results:" )
152180
0 commit comments