1010urllib3 .disable_warnings (urllib3 .exceptions .InsecureRequestWarning )
1111
1212
13- def is_more_recent (headers : dict , dest : Path ) -> bool :
13+ def remote_is_more_recent (headers : dict , dest : Path ) -> bool :
1414 """Check if the remote file is more recent than the local file."""
1515 if not dest .exists ():
1616 return False
@@ -27,6 +27,42 @@ def is_more_recent(headers: dict, dest: Path) -> bool:
2727 return False
2828
2929
30+ def _print_progress (
31+ downloaded : int ,
32+ total : int ,
33+ width : int = 50 ,
34+ ) -> None :
35+ if not total :
36+ return
37+
38+ p = downloaded / total
39+ filled = int (p * width )
40+ bar = "=" * filled + "-" * (width - filled )
41+ size_mb = downloaded / (1024 * 1024 )
42+ msg = f"\r [{ bar } ] { p :.1%} ({ size_mb :.2f} MiB)"
43+ sys .stdout .write (msg )
44+ sys .stdout .flush ()
45+
46+
47+ def _download_stream (
48+ response : requests .Response ,
49+ output : Path ,
50+ blocksize : int ,
51+ ) -> None :
52+ response .raise_for_status ()
53+ total_length = int (response .headers .get ("content-length" , 0 ))
54+
55+ downloaded_size = 0
56+ with open (output , "wb" ) as f :
57+ for chunk in response .iter_content (chunk_size = blocksize ):
58+ if chunk :
59+ f .write (chunk )
60+ downloaded_size += len (chunk )
61+ _print_progress (downloaded_size , total_length )
62+
63+ sys .stdout .write ("\n " )
64+
65+
3066def download_file (
3167 url : str ,
3268 output : Path ,
@@ -48,7 +84,11 @@ def download_file(
4884 The path to the downloaded file.
4985 """
5086 headers = {
51- "User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36" ,
87+ "User-Agent" : (
88+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
89+ "AppleWebKit/537.36 (KHTML, like Gecko) "
90+ "Chrome/142.0.0.0 Safari/537.36"
91+ ),
5292 }
5393
5494 # Ensure parent directory exists
@@ -64,38 +104,23 @@ def download_file(
64104 url , headers = headers , timeout = 10 , verify = verify_ssl
65105 )
66106
67- if output .exists () and not is_more_recent (head_resp .headers , output ):
68- sys .stdout .write (f" { output .name } is up to date.\n " )
107+ # Check if local file is up to date (i.e. remote is NOT newer)
108+ cond = remote_is_more_recent (head_resp .headers , output )
109+ if output .exists () and not cond :
110+ sys .stdout .write (f"{ output .name } is up to date.\n " )
69111 sys .stdout .flush ()
70112 return output
71113
72114 # Perform the specific download
73115 with requests .get (
74- url , headers = headers , stream = True , timeout = 30 , verify = verify_ssl
116+ url ,
117+ headers = headers ,
118+ stream = True ,
119+ timeout = 30 ,
120+ verify = verify_ssl ,
75121 ) as r :
76- r .raise_for_status ()
77- total_length = int (r .headers .get ("content-length" , 0 ))
78-
79- downloaded_size = 0
80- with open (output , "wb" ) as f :
81- for chunk in r .iter_content (chunk_size = blocksize ):
82- if chunk :
83- f .write (chunk )
84- downloaded_size += len (chunk )
85-
86- # Simple progress bar
87- if total_length :
88- percent = downloaded_size / total_length
89- bar_length = 50
90- filled = int (percent * bar_length )
91- bar = "=" * filled + "-" * (bar_length - filled )
92- size_mb = downloaded_size / (1024 * 1024 )
93- sys .stdout .write (
94- f"\r [{ bar } ] { percent :.1%} ({ size_mb :.2f} MiB)"
95- )
96- sys .stdout .flush ()
97-
98- sys .stdout .write ("\n " )
122+ _download_stream (r , output , blocksize )
123+
99124 return output
100125
101126 except requests .RequestException as e :
0 commit comments