44import hashlib
55import logging
66import asyncio
7+ import socket
8+ import ipaddress
79from pathlib import Path
810from copy import deepcopy
911from random import randint , choice
1012from contextlib import asynccontextmanager
13+ from urllib .parse import urlparse , urljoin
1114
1215import httpx
1316from slugify import slugify
@@ -81,7 +84,13 @@ async def get_redirected_url(url, **kwargs):
8184 The final URL after following redirects, or the original URL if
8285 no redirects are found or an error occurs.
8386 """
87+
8488 kwargs ["follow_redirects" ] = True
89+ event_hooks = kwargs .setdefault ("event_hooks" , {})
90+ response_hooks = list (event_hooks .get ("response" ) or [])
91+ response_hooks .append (_check_redirect_safety )
92+ event_hooks ["response" ] = response_hooks
93+
8594 try :
8695 async with httpx .AsyncClient (** kwargs ) as client :
8796 response = await client .head (url )
@@ -90,6 +99,53 @@ async def get_redirected_url(url, **kwargs):
9099 return url
91100
92101
102+ async def _check_redirect_safety (response ):
103+ """Validate each redirect target before following it."""
104+ if not response .is_redirect :
105+ return
106+
107+ redirect_url = response .headers .get ("location" )
108+ if not redirect_url :
109+ return
110+
111+ if not redirect_url .startswith (("http://" , "https://" )):
112+ redirect_url = urljoin (str (response .url ), redirect_url )
113+
114+ if not _is_safe_url (redirect_url ):
115+ raise ValueError (f"Redirect target is not allowed: { redirect_url } " )
116+
117+
118+ def _is_safe_url (url ):
119+ """Return whether a URL resolves to a globally routable address."""
120+ try :
121+ parsed = urlparse (url )
122+ hostname = parsed .hostname
123+
124+ if not hostname :
125+ return False
126+
127+ try :
128+ ip = ipaddress .ip_address (hostname )
129+ except ValueError :
130+
131+ try :
132+ ip_str = socket .gethostbyname (hostname )
133+ ip = ipaddress .ip_address (ip_str )
134+ except (socket .gaierror , socket .herror ):
135+ return False
136+
137+ return ip .is_global and not (
138+ ip .is_private
139+ or ip .is_loopback
140+ or ip .is_link_local
141+ or ip .is_reserved
142+ or ip .is_multicast
143+ or ip .is_unspecified
144+ )
145+ except Exception :
146+ return False
147+
148+
93149def clean_search_query (query ):
94150 """Check if the first character is a digit and remove it if so.
95151
0 commit comments