Fix issue #8 "can't download pathway archive" (#9)

kozo2 · web-flow · commit a0f95d8ef362 · 2024-09-23T17:56:57.000+09:00
* Use the new JSON-API #8 * Add docstring * Add __init__.py to tests * Add test_download_pathway_archive.py to tests * Refactor download_pathway_archive.py to use new JSON-API endpoint
diff --git a/pywikipathways/download_pathway_archive.py b/pywikipathways/download_pathway_archive.py
@@ -6,6 +6,31 @@
 from .list_organisms import *
 
 def download_pathway_archive(date='current', organism=None, format='gpml', destpath='./'):
+    """Download Pathway Archive
+
+    Access the monthly archives of pathway content from WikiPathways.
+
+    If you do not specify an organism, then an archive file will not be downloaded.
+    Instead, the archive will be opened in a tab in your default browser.
+
+    Args:
+        date (str, optional): The timestamp for a monthly release (e.g., 20171010) 
+            or "current" (default) for the latest release.
+        organism (str, optional): A particular species. See `listOrganisms`.
+        format (str, optional): Either "gpml" (default), "gmt", or "svg".
+        destpath (str, optional): Destination path for the file to be downloaded to. 
+            Default is the current working directory.
+
+    Returns:
+        str: Filename of the downloaded file or an opened tab in the default browser.
+
+    Examples:
+        >>> download_pathway_archive()  # open in browser
+        >>> download_pathway_archive(format="gmt")  # open in browser
+        >>> download_pathway_archive(date="20230710", format="svg")  # open in browser
+        >>> download_pathway_archive(date="20230710", organism="Mus musculus", format="svg")  # download file
+        >>> download_pathway_archive(organism="Mus musculus")  # download file
+    """
     # get validated format
     if not format in ['gpml', 'gmt', 'svg']:
         sys.exit(format + " is not in ['gpml', 'gmt', 'svg']. Please specify one of these.")
@@ -24,18 +49,16 @@ def download_pathway_archive(date='current', organism=None, format='gpml', destp
     # download specific file, or...
     if organism:
         if date == 'current':
-            curr_files = pandas.read_html("https://wikipathways-data.wmcloud.org/current/" + format)[0]['Filename']
-            filename = curr_files[curr_files.str.contains(organism.replace(" ", "_"))]
-            filename = list(filename)[0]
-            if not True in curr_files.str.contains(organism.replace(" ", "_")):
+            curr_files = pandas.read_html("https://data.wikipathways.org/current/" + format)[0]["File Name"]
+            filename = curr_files[curr_files.str.contains(organism.replace(" ", "_"))].iloc[0]
+            if len(filename) == 0:
                 sys.exit('Could not find a file matching your specifications. Try browsing http://data.wikipathways.org.')
         else:
-            if requests.get("https://wikipathways-data.wmcloud.org/" + date).ok:
-                ext = ".zip"
-                if format == 'gmt':
-                    ext = ".gmt"
-                filename = "-".join(['wikipathways', date, format, organism.replace(" ", "_")]) + ext
-        url = "/".join(['http://data.wikipathways.org', date, format, filename])
+            ext = ".zip"
+            if format == 'gmt':
+                ext = ".gmt"
+            filename = "-".join(['wikipathways', date, format, organism.replace(" ", "_")]) + ext
+        url = "/".join(['https://data.wikipathways.org', date, format, filename])
         r = requests.get(url)
         file = open(filename, "wb")
         file.write(r.content)
diff --git a/pywikipathways/list_organisms.py b/pywikipathways/list_organisms.py
@@ -1,5 +1,16 @@
-from .utilities import *
+import requests
 
 def list_organisms():
-    res = wikipathways_get('listOrganisms', {'format': 'json'})
-    return res['organisms']
+    """List Organisms.
+
+    Retrieve the list of organisms supported by WikiPathways
+
+    Returns:
+        list: A list of organisms
+
+    Example:
+        >>> list_organisms()
+    """
+    res = requests.get("https://www.wikipathways.org/json/listOrganisms.json")
+    res.raise_for_status()
+    return res.json()['organisms']
diff --git a/tests/__init__.py b/tests/__init__.py
diff --git a/tests/test_download_pathway_archive.py b/tests/test_download_pathway_archive.py
@@ -0,0 +1,13 @@
+import pytest
+from pywikipathways.download_pathway_archive import *
+
+def test_successful_download():
+    filename = download_pathway_archive(date='current', organism='Mus musculus', format='gpml')
+    assert filename == 'wikipathways-20240910-gpml-Mus_musculus.zip'
+
+    filename = download_pathway_archive(date='current', organism='Mus musculus', format='gmt')
+    assert filename == 'wikipathways-20240910-gmt-Mus_musculus.gmt'
+
+    filename = download_pathway_archive(date='current', organism='Mus musculus', format='svg')
+    assert filename == 'wikipathways-20240910-svg-Mus_musculus.zip'
+