@@ -19,6 +19,67 @@ def escape_special_chars(text, tool=None):
1919 return text
2020
2121
22+ def get_filename_from_content_disposition (url , verify_ssl , logger = None ):
23+ """Browser-like filename resolution.
24+
25+ Performs a lightweight HTTP request and inspects the server's
26+ ``Content-Disposition`` header (honouring the RFC 5987 ``filename*`` form)
27+ to determine the name a browser would save the file as. Falls back to the
28+ basename of the final (post-redirect) URL path. Returns the resolved
29+ filename or ``None`` when it cannot be determined. Best-effort: any error
30+ results in ``None`` so the caller can fall back to URL-based naming.
31+ """
32+ try :
33+ import re
34+ import ssl
35+ import urllib .request
36+ from urllib .parse import unquote , urlsplit
37+
38+ ctx = None
39+ if not verify_ssl :
40+ ctx = ssl .create_default_context ()
41+ ctx .check_hostname = False
42+ ctx .verify_mode = ssl .CERT_NONE
43+
44+ # Use GET (many servers reject HEAD); urllib only reads the body
45+ # lazily, so closing the response avoids downloading the payload.
46+ req = urllib .request .Request (
47+ url , headers = {'User-Agent' : 'Mozilla/5.0' }, method = 'GET' )
48+ with urllib .request .urlopen (req , timeout = 15 , context = ctx ) as resp :
49+ cd = resp .headers .get ('Content-Disposition' , '' ) or ''
50+ final_url = resp .geturl ()
51+
52+ if cd :
53+ # Prefer RFC 5987: filename*=UTF-8''<percent-encoded>
54+ m = re .search (
55+ r"filename\*\s*=\s*[^']*''([^;\r\n]+)" , cd , re .IGNORECASE )
56+ if m :
57+ name = os .path .basename (
58+ unquote (m .group (1 ).strip ().strip ('"' )))
59+ if name :
60+ return name
61+ # Then the plain filename= form (quoted or bare).
62+ m = re .search (r'filename\s*=\s*"([^"]+)"' , cd , re .IGNORECASE )
63+ if not m :
64+ m = re .search (
65+ r'filename\s*=\s*([^;\r\n]+)' , cd , re .IGNORECASE )
66+ if m :
67+ name = os .path .basename (m .group (1 ).strip ().strip ('"' ))
68+ if name :
69+ return name
70+
71+ # Fall back to the basename of the final (possibly redirected) URL.
72+ if final_url :
73+ tail = os .path .basename (urlsplit (final_url ).path )
74+ if "." in tail :
75+ return tail
76+ except Exception as e :
77+ if logger is not None :
78+ logger .warning (
79+ f"Could not resolve filename from headers for { url } : { e } " )
80+ return None
81+
82+
2283def preprocess (i ):
2384
2485 os_info = i ['os_info' ]
@@ -108,18 +169,29 @@ def preprocess(i):
108169 os .chdir (download_path )
109170
110171 if env .get ('MLC_DOWNLOAD_FILENAME' , '' ) == '' :
111- urltail = os .path .basename (env ['MLC_DOWNLOAD_URL' ])
112- urlhead = os .path .dirname (env ['MLC_DOWNLOAD_URL' ])
113- if "." in urltail and "/" in urlhead :
114- # Check if ? after filename
115- j = urltail .find ('?' )
116- if j > 0 :
117- urltail = urltail [:j ]
118- env ['MLC_DOWNLOAD_FILENAME' ] = urltail
119- elif env .get ('MLC_DOWNLOAD_TOOL' , '' ) == "rclone" :
120- env ['MLC_DOWNLOAD_FILENAME' ] = urltail
172+ download_url = env ['MLC_DOWNLOAD_URL' ]
173+ resolved_name = ''
174+ # Browser-like behaviour: ask the server what the file should be
175+ # named (Content-Disposition) before deriving it from the URL.
176+ if download_url .lower ().startswith (('http://' , 'https://' )):
177+ resolved_name = get_filename_from_content_disposition (
178+ download_url , verify_ssl , logger ) or ''
179+ if resolved_name != '' :
180+ env ['MLC_DOWNLOAD_FILENAME' ] = resolved_name
121181 else :
122- env ['MLC_DOWNLOAD_FILENAME' ] = "index.html"
182+ # Fallback: derive the filename from the URL path (basename).
183+ urltail = os .path .basename (download_url )
184+ urlhead = os .path .dirname (download_url )
185+ if "." in urltail and "/" in urlhead :
186+ # Check if ? after filename
187+ j = urltail .find ('?' )
188+ if j > 0 :
189+ urltail = urltail [:j ]
190+ env ['MLC_DOWNLOAD_FILENAME' ] = urltail
191+ elif env .get ('MLC_DOWNLOAD_TOOL' , '' ) == "rclone" :
192+ env ['MLC_DOWNLOAD_FILENAME' ] = urltail
193+ else :
194+ env ['MLC_DOWNLOAD_FILENAME' ] = "index.html"
123195
124196 if tool == "mlcutil" :
125197 mlcutil_require_download = 0
0 commit comments