Skip to content

Commit 4223139

Browse files
committed
refactor: improve download streaming and progress bar logic by extracting helper functions and enhancing readability.
1 parent e8dfeb4 commit 4223139

5 files changed

Lines changed: 82 additions & 38 deletions

File tree

comexdown/__init__.py

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,12 @@ def get_year(path: Path, year: int, exp=False, imp=False, mun=False):
3131

3232
for direction in directions:
3333
url = urls.trade(direction=direction, year=year, mun=mun)
34-
file_path = fs.path_trade(root=path, direction=direction, year=year, mun=mun)
34+
file_path = fs.path_trade(
35+
root=path,
36+
direction=direction,
37+
year=year,
38+
mun=mun,
39+
)
3540
download.download_file(url, file_path)
3641

3742

@@ -57,7 +62,11 @@ def get_year_nbm(path: Path, year: int, exp=False, imp=False):
5762

5863
for direction in directions:
5964
url = urls.trade(direction=direction, year=year, nbm=True)
60-
file_path = fs.path_trade_nbm(root=path, direction=direction, year=year)
65+
file_path = fs.path_trade_nbm(
66+
root=path,
67+
direction=direction,
68+
year=year,
69+
)
6170
download.download_file(url, file_path)
6271

6372

@@ -84,17 +93,22 @@ def get_complete(path: Path, exp=False, imp=False, mun=False):
8493
for direction in directions:
8594
url = urls.complete(direction=direction, mun=mun)
8695
# Note: 'complete' files might have different naming conventions
87-
# The original code relied on download.exp_complete which hardcoded the filename.
88-
# fs.path_trade generates paths like .../exp/EXP_2020.csv, which isn't right for complete zip files.
96+
# The original code relied on download.exp_complete which hardcoded the
97+
# filename.
98+
# fs.path_trade generates paths like .../exp/EXP_2020.csv, which isn't
99+
# right for complete zip files.
89100
# We need to handle the output path for complete files.
90101
# The original code did: path / filename (where filename is separate).
91102
# We need to replicate that logic or add it to fs.py.
92-
# Let's simple determine the filename from the URL for now as the original did.
103+
# Let's simple determine the filename from the URL for now as the
104+
# original did.
93105
filename = url.split("/")[-1]
94106
file_path = path / filename
95107

96-
# Original code for complete files saved directly to `path` (or `path` was a directory).
97-
# The original implementation for complete files: `filepath = path / filename`.
108+
# Original code for complete files saved directly to `path` (or `path`
109+
# was a directory).
110+
# The original implementation for complete files:
111+
# `filepath = path / filename`.
98112
# So we expect `path` to be a directory.
99113
download.download_file(url, file_path)
100114

comexdown/download.py

Lines changed: 53 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
1111

1212

13-
def is_more_recent(headers: dict, dest: Path) -> bool:
13+
def remote_is_more_recent(headers: dict, dest: Path) -> bool:
1414
"""Check if the remote file is more recent than the local file."""
1515
if not dest.exists():
1616
return False
@@ -27,6 +27,42 @@ def is_more_recent(headers: dict, dest: Path) -> bool:
2727
return False
2828

2929

30+
def _print_progress(
31+
downloaded: int,
32+
total: int,
33+
width: int = 50,
34+
) -> None:
35+
if not total:
36+
return
37+
38+
p = downloaded / total
39+
filled = int(p * width)
40+
bar = "=" * filled + "-" * (width - filled)
41+
size_mb = downloaded / (1024 * 1024)
42+
msg = f"\r[{bar}] {p:.1%} ({size_mb:.2f} MiB)"
43+
sys.stdout.write(msg)
44+
sys.stdout.flush()
45+
46+
47+
def _download_stream(
48+
response: requests.Response,
49+
output: Path,
50+
blocksize: int,
51+
) -> None:
52+
response.raise_for_status()
53+
total_length = int(response.headers.get("content-length", 0))
54+
55+
downloaded_size = 0
56+
with open(output, "wb") as f:
57+
for chunk in response.iter_content(chunk_size=blocksize):
58+
if chunk:
59+
f.write(chunk)
60+
downloaded_size += len(chunk)
61+
_print_progress(downloaded_size, total_length)
62+
63+
sys.stdout.write("\n")
64+
65+
3066
def download_file(
3167
url: str,
3268
output: Path,
@@ -48,7 +84,11 @@ def download_file(
4884
The path to the downloaded file.
4985
"""
5086
headers = {
51-
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36",
87+
"User-Agent": (
88+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
89+
"AppleWebKit/537.36 (KHTML, like Gecko) "
90+
"Chrome/142.0.0.0 Safari/537.36"
91+
),
5292
}
5393

5494
# Ensure parent directory exists
@@ -64,38 +104,23 @@ def download_file(
64104
url, headers=headers, timeout=10, verify=verify_ssl
65105
)
66106

67-
if output.exists() and not is_more_recent(head_resp.headers, output):
68-
sys.stdout.write(f" {output.name} is up to date.\n")
107+
# Check if local file is up to date (i.e. remote is NOT newer)
108+
cond = remote_is_more_recent(head_resp.headers, output)
109+
if output.exists() and not cond:
110+
sys.stdout.write(f"{output.name} is up to date.\n")
69111
sys.stdout.flush()
70112
return output
71113

72114
# Perform the specific download
73115
with requests.get(
74-
url, headers=headers, stream=True, timeout=30, verify=verify_ssl
116+
url,
117+
headers=headers,
118+
stream=True,
119+
timeout=30,
120+
verify=verify_ssl,
75121
) as r:
76-
r.raise_for_status()
77-
total_length = int(r.headers.get("content-length", 0))
78-
79-
downloaded_size = 0
80-
with open(output, "wb") as f:
81-
for chunk in r.iter_content(chunk_size=blocksize):
82-
if chunk:
83-
f.write(chunk)
84-
downloaded_size += len(chunk)
85-
86-
# Simple progress bar
87-
if total_length:
88-
percent = downloaded_size / total_length
89-
bar_length = 50
90-
filled = int(percent * bar_length)
91-
bar = "=" * filled + "-" * (bar_length - filled)
92-
size_mb = downloaded_size / (1024 * 1024)
93-
sys.stdout.write(
94-
f"\r[{bar}] {percent:.1%} ({size_mb:.2f} MiB)"
95-
)
96-
sys.stdout.flush()
97-
98-
sys.stdout.write("\n")
122+
_download_stream(r, output, blocksize)
123+
99124
return output
100125

101126
except requests.RequestException as e:

comexdown/urls.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,12 @@ def table(table_name: str) -> str:
99
return f"{BASE_URL}tabelas/{tables.AUX_TABLES[table_name]}"
1010

1111

12-
def trade(direction: str, year: int, mun: bool = False, nbm: bool = False) -> str:
12+
def trade(
13+
direction: str,
14+
year: int,
15+
mun: bool = False,
16+
nbm: bool = False,
17+
) -> str:
1318
"""
1419
Generates URL for trade data.
1520
direction: 'exp' or 'imp'

pyproject.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ dependencies = [
2525
"requests>=2.32.5",
2626
]
2727
optional-dependencies = { dev = [
28-
"coveralls",
2928
"pytest",
3029
"pytest-cov",
3130
"pylint",

tests/test_download.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@ def test_download_file(self, mock_open, mock_requests, mock_sys):
2222
mock_head.headers = {}
2323
mock_requests.head.return_value = mock_head
2424

25-
download.download_file("http://www.example.com/file.csv", Path("data/file.csv"))
25+
download.download_file(
26+
"http://www.example.com/file.csv", Path("data/file.csv"))
2627

2728
mock_requests.get.assert_called()
2829
mock_open.assert_called_with(Path("data/file.csv"), "wb")

0 commit comments

Comments
 (0)