Skip to content

Commit 58fa1d8

Browse files
feat: add port for the download_manager package as a replacement to Pooch
1 parent a45136b commit 58fa1d8

1 file changed

Lines changed: 131 additions & 3 deletions

File tree

ontograph/downloader.py

Lines changed: 131 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -213,8 +213,136 @@ def _get_resource_url(
213213
class DownloadManagerAdapter(DownloaderPort):
214214
"""Alternative downloader implementation.
215215
216-
Placeholder class for a implement the adapter using the
217-
`downloader-manager` by Saezlab.
216+
Adapter for the `download_manager` package by Saezlab.
218217
"""
219218

220-
pass
219+
def __init__(
220+
self,
221+
cache_dir: Path,
222+
*,
223+
backend: str = 'requests',
224+
**kwargs: object,
225+
) -> None:
226+
"""Initialize the download-manager adapter.
227+
228+
Args:
229+
cache_dir: Directory to store downloaded files.
230+
backend: Backend for download_manager ('requests' or 'curl').
231+
**kwargs: Extra keyword args forwarded to DownloadManager.
232+
"""
233+
try:
234+
import download_manager as dm
235+
except ModuleNotFoundError as exc:
236+
raise ModuleNotFoundError(
237+
'download_manager is not installed. '
238+
'Install it to use DownloadManagerAdapter.'
239+
) from exc
240+
241+
self._cache_dir = cache_dir
242+
self._cache_dir.mkdir(parents=True, exist_ok=True)
243+
self._manager = dm.DownloadManager(
244+
path=str(self._cache_dir),
245+
backend=backend,
246+
**kwargs,
247+
)
248+
self._resources_paths: dict[str, Path] = {}
249+
250+
def get_paths(self) -> dict[str, Path]:
251+
"""Get paths of all downloaded resources.
252+
253+
Returns:
254+
dict[str, Path]: dictionary mapping resource IDs to file paths
255+
"""
256+
return self._resources_paths
257+
258+
def fetch_from_url(self, url_ontology: str, filename: str | None) -> Path:
259+
"""Download an ontology file from a specified URL.
260+
261+
Args:
262+
url_ontology: URL pointing to the ontology file
263+
filename: Name to save the file as
264+
265+
Returns:
266+
Path: Path to the downloaded file
267+
268+
Raises:
269+
ValueError: If the URL or filename is empty
270+
RequestException: If the download fails
271+
IOError: If saving the file fails
272+
"""
273+
self._validate_download_parameters(url_ontology, filename)
274+
275+
dest = self._cache_dir / filename
276+
logging.info(f'Downloading ontology from {url_ontology} as {dest}')
277+
result_path = self._manager.download(url_ontology, dest=str(dest))
278+
if not result_path:
279+
raise OSError('Download manager did not return a file path.')
280+
281+
result = Path(result_path)
282+
self._resources_paths[dest.stem] = result
283+
return result
284+
285+
def fetch_from_catalog(
286+
self, resources: list[dict[str, str]], catalog: CatalogOntologies
287+
) -> dict[str, Path]:
288+
"""Download multiple ontology files defined in a catalog.
289+
290+
Args:
291+
resources: list of dictionaries with resource information
292+
catalog: Catalog object containing download URLs
293+
294+
Returns:
295+
dict[str, Path]: dictionary mapping resource IDs to file paths
296+
297+
Raises:
298+
ValueError: If the resources list is empty or URL not found
299+
KeyError: If a resource is missing required fields
300+
"""
301+
if not resources:
302+
raise ValueError('Resources list for batch download is empty.')
303+
304+
results = {}
305+
for resource in resources:
306+
name_id, format_type = self._extract_resource_info(resource)
307+
url = self._get_resource_url(name_id, format_type, catalog)
308+
309+
filename = f'{name_id}.{format_type}'
310+
local_path = self.fetch_from_url(
311+
url_ontology=url, filename=filename
312+
)
313+
results[name_id] = local_path
314+
315+
self._resources_paths.update(results)
316+
return results
317+
318+
def _validate_download_parameters(
319+
self, url_ontology: str, filename: str | None
320+
) -> None:
321+
if not url_ontology or not url_ontology.strip():
322+
raise ValueError('URL cannot be empty')
323+
324+
if not filename or not filename.strip():
325+
raise ValueError('Filename cannot be empty')
326+
327+
def _extract_resource_info(
328+
self, resource: dict[str, str]
329+
) -> tuple[str, str]:
330+
name_id = resource.get('name_id')
331+
if not name_id:
332+
raise KeyError("Resource dictionary must contain 'name_id' key")
333+
334+
format_type = resource.get(
335+
'format', DEFAULT_FORMAT_ONTOLOGY
336+
) # Default to OBO format
337+
return name_id, format_type
338+
339+
def _get_resource_url(
340+
self, name_id: str, format_type: str, catalog: CatalogOntologies
341+
) -> str:
342+
url = catalog.get_download_url(name_id, format_type)
343+
if not url:
344+
raise ValueError(
345+
f'Cannot find download URL for ontology {name_id} '
346+
f'in format {format_type}'
347+
)
348+
return url

0 commit comments

Comments
 (0)