Skip to content

Commit a0f95d8

Browse files
authored
Fix issue #8 "can't download pathway archive" (#9)
* Use the new JSON-API #8 * Add docstring * Add __init__.py to tests * Add test_download_pathway_archive.py to tests * Refactor download_pathway_archive.py to use new JSON-API endpoint
1 parent e6676b7 commit a0f95d8

4 files changed

Lines changed: 60 additions & 13 deletions

File tree

pywikipathways/download_pathway_archive.py

Lines changed: 33 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,31 @@
66
from .list_organisms import *
77

88
def download_pathway_archive(date='current', organism=None, format='gpml', destpath='./'):
9+
"""Download Pathway Archive
10+
11+
Access the monthly archives of pathway content from WikiPathways.
12+
13+
If you do not specify an organism, then an archive file will not be downloaded.
14+
Instead, the archive will be opened in a tab in your default browser.
15+
16+
Args:
17+
date (str, optional): The timestamp for a monthly release (e.g., 20171010)
18+
or "current" (default) for the latest release.
19+
organism (str, optional): A particular species. See `listOrganisms`.
20+
format (str, optional): Either "gpml" (default), "gmt", or "svg".
21+
destpath (str, optional): Destination path for the file to be downloaded to.
22+
Default is the current working directory.
23+
24+
Returns:
25+
str: Filename of the downloaded file or an opened tab in the default browser.
26+
27+
Examples:
28+
>>> download_pathway_archive() # open in browser
29+
>>> download_pathway_archive(format="gmt") # open in browser
30+
>>> download_pathway_archive(date="20230710", format="svg") # open in browser
31+
>>> download_pathway_archive(date="20230710", organism="Mus musculus", format="svg") # download file
32+
>>> download_pathway_archive(organism="Mus musculus") # download file
33+
"""
934
# get validated format
1035
if not format in ['gpml', 'gmt', 'svg']:
1136
sys.exit(format + " is not in ['gpml', 'gmt', 'svg']. Please specify one of these.")
@@ -24,18 +49,16 @@ def download_pathway_archive(date='current', organism=None, format='gpml', destp
2449
# download specific file, or...
2550
if organism:
2651
if date == 'current':
27-
curr_files = pandas.read_html("https://wikipathways-data.wmcloud.org/current/" + format)[0]['Filename']
28-
filename = curr_files[curr_files.str.contains(organism.replace(" ", "_"))]
29-
filename = list(filename)[0]
30-
if not True in curr_files.str.contains(organism.replace(" ", "_")):
52+
curr_files = pandas.read_html("https://data.wikipathways.org/current/" + format)[0]["File Name"]
53+
filename = curr_files[curr_files.str.contains(organism.replace(" ", "_"))].iloc[0]
54+
if len(filename) == 0:
3155
sys.exit('Could not find a file matching your specifications. Try browsing http://data.wikipathways.org.')
3256
else:
33-
if requests.get("https://wikipathways-data.wmcloud.org/" + date).ok:
34-
ext = ".zip"
35-
if format == 'gmt':
36-
ext = ".gmt"
37-
filename = "-".join(['wikipathways', date, format, organism.replace(" ", "_")]) + ext
38-
url = "/".join(['http://data.wikipathways.org', date, format, filename])
57+
ext = ".zip"
58+
if format == 'gmt':
59+
ext = ".gmt"
60+
filename = "-".join(['wikipathways', date, format, organism.replace(" ", "_")]) + ext
61+
url = "/".join(['https://data.wikipathways.org', date, format, filename])
3962
r = requests.get(url)
4063
file = open(filename, "wb")
4164
file.write(r.content)

pywikipathways/list_organisms.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,16 @@
1-
from .utilities import *
1+
import requests
22

33
def list_organisms():
4-
res = wikipathways_get('listOrganisms', {'format': 'json'})
5-
return res['organisms']
4+
"""List Organisms.
5+
6+
Retrieve the list of organisms supported by WikiPathways
7+
8+
Returns:
9+
list: A list of organisms
10+
11+
Example:
12+
>>> list_organisms()
13+
"""
14+
res = requests.get("https://www.wikipathways.org/json/listOrganisms.json")
15+
res.raise_for_status()
16+
return res.json()['organisms']

tests/__init__.py

Whitespace-only changes.
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
import pytest
2+
from pywikipathways.download_pathway_archive import *
3+
4+
def test_successful_download():
5+
filename = download_pathway_archive(date='current', organism='Mus musculus', format='gpml')
6+
assert filename == 'wikipathways-20240910-gpml-Mus_musculus.zip'
7+
8+
filename = download_pathway_archive(date='current', organism='Mus musculus', format='gmt')
9+
assert filename == 'wikipathways-20240910-gmt-Mus_musculus.gmt'
10+
11+
filename = download_pathway_archive(date='current', organism='Mus musculus', format='svg')
12+
assert filename == 'wikipathways-20240910-svg-Mus_musculus.zip'
13+

0 commit comments

Comments
 (0)