Skip to content

Commit 4944878

Browse files
committed
Added attempts to zenodo download
1 parent 43db577 commit 4944878

2 files changed

Lines changed: 34 additions & 3 deletions

File tree

CHANGELOG.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,9 @@ and this project adheres to [Semantic Versioning][].
1111
## 2.1.3
1212

1313
### Changes
14-
- `pp.adjmat` now returns the same features as used as input instead of the subset of `net`.
15-
- `pp.pseudobulk` now returns the same order features as used as input instead of shuffling them.
14+
- `pp.adjmat` now returns the same features as used as input instead of the subset of `net`
15+
- `pp.pseudobulk` now returns the same order features as used as input instead of shuffling them
16+
- Added 5 attemps to `_download` to mitigate 429 Client Error from Zenodo downloads
1617

1718
## 2.1.2
1819

src/decoupler/_download.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,15 @@
33
import pandas as pd
44
import requests
55
from tqdm import tqdm
6+
import time
67

78
from decoupler._log import _log
89

910
URL_DBS = "https://omnipathdb.org/annotations?databases="
1011
URL_INT = "https://omnipathdb.org/interactions/?genesymbols=1&"
1112

1213

13-
def _download(
14+
def _download_chunks(
1415
url: str,
1516
verbose: bool = False,
1617
) -> io.BytesIO:
@@ -33,6 +34,35 @@ def _download(
3334
return data
3435

3536

37+
def _download(
38+
url: str,
39+
verbose: bool = False,
40+
retries: int = 5,
41+
wait_time: int = 20,
42+
) -> io.BytesIO:
43+
m = f"Downloading {url}"
44+
_log(m, level="info", verbose=verbose)
45+
data = None
46+
for attempt in range(1, retries + 1):
47+
try:
48+
data = _download_chunks(url, verbose=False)
49+
break
50+
except requests.exceptions.HTTPError as e:
51+
status_code = e.response.status_code if e.response is not None else None
52+
if status_code == 429 and attempt < retries:
53+
_log(
54+
f"429 Too Many Requests for {url}. Retrying in {wait_time}s (attempt {attempt + 1}/{retries})",
55+
level="warn",
56+
verbose=verbose,
57+
)
58+
time.sleep(wait_time)
59+
continue
60+
raise # Not a 429 or no retries left: re-raise
61+
m = "Download finished"
62+
_log(m, level="info", verbose=verbose)
63+
return data
64+
65+
3666
def _bytes_to_pandas(data: io.BytesIO, **kwargs) -> pd.DataFrame:
3767
df = pd.read_csv(data, **kwargs)
3868
return df

0 commit comments

Comments
 (0)