Skip to content

Use sessions for downloaders, and allow to add auth headers #97

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions datahugger/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,14 @@ def main():
"May appear multiple times.",
)

parser.add_argument(
"--auth-token",
help="Add an authentication token to the downloader. This will send an "
"Authorization header with the value of the token "
"(i.e. Authorization Bearer <token>).",
default=None,
)

# version
parser.add_argument(
"-V",
Expand All @@ -119,6 +127,7 @@ def main():
progress=args.progress,
print_only=args.print_only,
params=args.params,
token=args.auth_token,
)

except DOIError as doi_err:
Expand Down
8 changes: 8 additions & 0 deletions datahugger/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ def info(
progress=True,
print_only=False,
params=None,
token=None,
):
"""Get info on the content of the dataset.

Expand All @@ -83,6 +84,8 @@ def info(
the actual files (Dry run). Default: False.
params: dict
Extra parameters for the request.
token: str
The authentication token for the service to be sent as an Authorization header.

Returns
-------
Expand All @@ -103,6 +106,7 @@ def info(
progress=progress,
print_only=print_only,
params=params,
token=token,
)


Expand All @@ -116,6 +120,7 @@ def get(
progress=True,
print_only=False,
params=None,
token=None,
):
"""Get the content of repository.

Expand Down Expand Up @@ -145,6 +150,8 @@ def get(
the actual files (Dry run). Default: False.
params: dict
Extra parameters for the request.
token: str
The authentication token for the service to be sent as an Authorization header.

Returns
-------
Expand All @@ -162,6 +169,7 @@ def get(
progress=progress,
print_only=print_only,
params=params,
token=token,
)

return service.download(output_folder)
12 changes: 9 additions & 3 deletions datahugger/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def __init__(
checksum=False,
print_only=False,
params=None,
token=None,
):
super().__init__()
self.resource = resource
Expand All @@ -67,6 +68,11 @@ def __init__(
self.checksum = checksum
self.print_only = print_only
self.params = params
self.token = token

self.session = requests.Session()
if self.token:
self.session.headers["Authorization"] = f"Bearer {self.token}"

def _get_attr_attr(self, record, jsonp):
try:
Expand Down Expand Up @@ -160,7 +166,7 @@ def download_file(

if not self.print_only:
logging.info(f"Downloading file {file_link}")
res = requests.get(file_link, stream=True)
res = self.session.get(file_link, stream=True)
res.raise_for_status()

output_fp = Path(output_folder, file_name)
Expand Down Expand Up @@ -198,7 +204,7 @@ def _parse_url(self, url):
raise ValueError(f"Failed to parse URL '{url}'") from err

def _unpack_single_folder(self, zip_url, output_folder):
r = requests.get(zip_url)
r = self.session.get(zip_url)
r.raise_for_status()

z = zipfile.ZipFile(io.BytesIO(r.content))
Expand Down Expand Up @@ -286,7 +292,7 @@ def _get_files_recursive(self, url, folder_name=None, base_url=None):
result = []

# get the data from URL
res = requests.get(url)
res = self.session.get(url)
res.raise_for_status()
response = res.json()

Expand Down
Loading